Update to compiler-rt 5.0.1.

author patrick <patrick@openbsd.org>

Tue, 26 Dec 2017 20:59:44 +0000 (20:59 +0000)

committer patrick <patrick@openbsd.org>

Tue, 26 Dec 2017 20:59:44 +0000 (20:59 +0000)
author patrick <patrick@openbsd.org>
Tue, 26 Dec 2017 20:59:44 +0000 (20:59 +0000)
committer patrick <patrick@openbsd.org>
Tue, 26 Dec 2017 20:59:44 +0000 (20:59 +0000)
diff --git a/lib/libcompiler_rt/CMakeLists.txt b/lib/libcompiler_rt/CMakeLists.txt

index 44a660f..f0d3f50 100644 (file)
--- a/lib/libcompiler_rt/CMakeLists.txt
+++ b/lib/libcompiler_rt/CMakeLists.txt
@@ -13,6 +13,10 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
      "${CMAKE_SOURCE_DIR}/../../cmake/Modules")
    include(base-config-ix)
    include(CompilerRTUtils)
+
+  load_llvm_config()
+  construct_compiler_rt_default_triple()
+
    if(APPLE)
      include(CompilerRTDarwinUtils)
    endif()
@@ -38,7 +42,8 @@ set(GENERIC_SOURCES
    ashlti3.c
    ashrdi3.c
    ashrti3.c
-  clear_cache.c
+  bswapdi2.c
+  bswapsi2.c
    clzdi2.c
    clzsi2.c
    clzti2.c
@@ -62,11 +67,10 @@ set(GENERIC_SOURCES
    divti3.c
    divtf3.c
    divxc3.c
-  enable_execute_stack.c
-  eprintf.c
    extendsfdf2.c
    extendhfsf2.c
    ffsdi2.c
+  ffssi2.c
    ffsti2.c
    fixdfdi.c
    fixdfsi.c
@@ -128,6 +132,7 @@ set(GENERIC_SOURCES
    negvdi2.c
    negvsi2.c
    negvti2.c
+  os_version_check.c
    paritydi2.c
    paritysi2.c
    parityti2.c
@@ -160,20 +165,43 @@ set(GENERIC_SOURCES
    umodsi3.c
    umodti3.c)
  
-if(COMPILER_RT_SUPPORTS_ATOMIC_KEYWORD)
+set(GENERIC_TF_SOURCES
+  comparetf2.c
+  extenddftf2.c
+  extendsftf2.c
+  fixtfdi.c
+  fixtfsi.c
+  fixtfti.c
+  fixunstfdi.c
+  fixunstfsi.c
+  fixunstfti.c
+  floatditf.c
+  floatsitf.c
+  floattitf.c
+  floatunditf.c
+  floatunsitf.c
+  floatuntitf.c
+  multc3.c
+  trunctfdf2.c
+  trunctfsf2.c)
+
+option(COMPILER_RT_EXCLUDE_ATOMIC_BUILTIN
+  "Skip the atomic builtin (this may be needed if system headers are unavailable)"
+  Off)
+
+if(NOT FUCHSIA AND NOT COMPILER_RT_BAREMETAL_BUILD)
    set(GENERIC_SOURCES
      ${GENERIC_SOURCES}
-    atomic.c)
+    emutls.c 
+    enable_execute_stack.c
+    eprintf.c)
  endif()
  
-set(MSVC_SOURCES
- divsc3.c
- divdc3.c
- divxc3.c
- mulsc3.c
- muldc3.c
- mulxc3.c)
-
+if(COMPILER_RT_HAS_ATOMIC_KEYWORD AND NOT COMPILER_RT_EXCLUDE_ATOMIC_BUILTIN)
+  set(GENERIC_SOURCES
+    ${GENERIC_SOURCES}
+    atomic.c)
+endif()
  
  if(APPLE)
    set(GENERIC_SOURCES
@@ -186,18 +214,18 @@ if(APPLE)
      atomic_thread_fence.c)
  endif()
  
-if(NOT WIN32 OR MINGW)
-  set(GENERIC_SOURCES
-      ${GENERIC_SOURCES}
-      emutls.c)
-endif()
-
  if (HAVE_UNWIND_H)
    set(GENERIC_SOURCES
        ${GENERIC_SOURCES}
        gcc_personality_v0.c)
  endif ()
  
+if (NOT FUCHSIA)
+  set(GENERIC_SOURCES
+    ${GENERIC_SOURCES}
+    clear_cache.c)
+endif()
+
  if (NOT MSVC)
    set(x86_64_SOURCES
        x86_64/chkstk.S
@@ -254,15 +282,54 @@ else () # MSVC
        x86_64/floatdidf.c
        x86_64/floatdisf.c
        x86_64/floatdixf.c
-      ${MSVC_SOURCES})
+      ${GENERIC_SOURCES})
    set(x86_64h_SOURCES ${x86_64_SOURCES})
-  set(i386_SOURCES ${MSVC_SOURCES})
+  set(i386_SOURCES ${GENERIC_SOURCES})
    set(i686_SOURCES ${i386_SOURCES})
  endif () # if (NOT MSVC)
  
  set(arm_SOURCES
-  arm/adddf3vfp.S
-  arm/addsf3vfp.S
+  arm/bswapdi2.S
+  arm/bswapsi2.S
+  arm/clzdi2.S
+  arm/clzsi2.S
+  arm/comparesf2.S
+  arm/divmodsi4.S
+  arm/divsi3.S
+  arm/modsi3.S
+  arm/sync_fetch_and_add_4.S
+  arm/sync_fetch_and_add_8.S
+  arm/sync_fetch_and_and_4.S
+  arm/sync_fetch_and_and_8.S
+  arm/sync_fetch_and_max_4.S
+  arm/sync_fetch_and_max_8.S
+  arm/sync_fetch_and_min_4.S
+  arm/sync_fetch_and_min_8.S
+  arm/sync_fetch_and_nand_4.S
+  arm/sync_fetch_and_nand_8.S
+  arm/sync_fetch_and_or_4.S
+  arm/sync_fetch_and_or_8.S
+  arm/sync_fetch_and_sub_4.S
+  arm/sync_fetch_and_sub_8.S
+  arm/sync_fetch_and_umax_4.S
+  arm/sync_fetch_and_umax_8.S
+  arm/sync_fetch_and_umin_4.S
+  arm/sync_fetch_and_umin_8.S
+  arm/sync_fetch_and_xor_4.S
+  arm/sync_fetch_and_xor_8.S
+  arm/udivmodsi4.S
+  arm/udivsi3.S
+  arm/umodsi3.S
+  ${GENERIC_SOURCES})
+
+set(thumb1_SOURCES
+  arm/divsi3.S
+  arm/udivsi3.S
+  arm/comparesf2.S
+  arm/addsf3.S
+  ${GENERIC_SOURCES})
+
+set(arm_EABI_SOURCES
    arm/aeabi_cdcmp.S
    arm/aeabi_cdcmpeq_check_nan.c
    arm/aeabi_cfcmp.S
@@ -279,16 +346,21 @@ set(arm_SOURCES
    arm/aeabi_memmove.S
    arm/aeabi_memset.S
    arm/aeabi_uidivmod.S
-  arm/aeabi_uldivmod.S
-  arm/bswapdi2.S
-  arm/bswapsi2.S
-  arm/clzdi2.S
-  arm/clzsi2.S
-  arm/comparesf2.S
+  arm/aeabi_uldivmod.S)
+
+set(arm_Thumb1_JT_SOURCES
+  arm/switch16.S
+  arm/switch32.S
+  arm/switch8.S
+  arm/switchu8.S)
+set(arm_Thumb1_SjLj_EH_SOURCES
+  arm/restore_vfp_d8_d15_regs.S
+  arm/save_vfp_d8_d15_regs.S)
+set(arm_Thumb1_VFPv2_SOURCES
+  arm/adddf3vfp.S
+  arm/addsf3vfp.S
    arm/divdf3vfp.S
-  arm/divmodsi4.S
    arm/divsf3vfp.S
-  arm/divsi3.S
    arm/eqdf2vfp.S
    arm/eqsf2vfp.S
    arm/extendsfdf2vfp.S
@@ -308,67 +380,64 @@ set(arm_SOURCES
    arm/lesf2vfp.S
    arm/ltdf2vfp.S
    arm/ltsf2vfp.S
-  arm/modsi3.S
    arm/muldf3vfp.S
    arm/mulsf3vfp.S
    arm/nedf2vfp.S
    arm/negdf2vfp.S
    arm/negsf2vfp.S
    arm/nesf2vfp.S
-  arm/restore_vfp_d8_d15_regs.S
-  arm/save_vfp_d8_d15_regs.S
    arm/subdf3vfp.S
    arm/subsf3vfp.S
-  arm/switch16.S
-  arm/switch32.S
-  arm/switch8.S
-  arm/switchu8.S
-  arm/sync_fetch_and_add_4.S
-  arm/sync_fetch_and_add_8.S
-  arm/sync_fetch_and_and_4.S
-  arm/sync_fetch_and_and_8.S
-  arm/sync_fetch_and_max_4.S
-  arm/sync_fetch_and_max_8.S
-  arm/sync_fetch_and_min_4.S
-  arm/sync_fetch_and_min_8.S
-  arm/sync_fetch_and_nand_4.S
-  arm/sync_fetch_and_nand_8.S
-  arm/sync_fetch_and_or_4.S
-  arm/sync_fetch_and_or_8.S
-  arm/sync_fetch_and_sub_4.S
-  arm/sync_fetch_and_sub_8.S
-  arm/sync_fetch_and_umax_4.S
-  arm/sync_fetch_and_umax_8.S
-  arm/sync_fetch_and_umin_4.S
-  arm/sync_fetch_and_umin_8.S
-  arm/sync_fetch_and_xor_4.S
-  arm/sync_fetch_and_xor_8.S
-  arm/sync_synchronize.S
    arm/truncdfsf2vfp.S
-  arm/udivmodsi4.S
-  arm/udivsi3.S
-  arm/umodsi3.S
    arm/unorddf2vfp.S
-  arm/unordsf2vfp.S
-  ${GENERIC_SOURCES})
+  arm/unordsf2vfp.S)
+set(arm_Thumb1_icache_SOURCES
+  arm/sync_synchronize.S)
+set(arm_Thumb1_SOURCES
+  ${arm_Thumb1_JT_SOURCES}
+  ${arm_Thumb1_SjLj_EH_SOURCES}
+  ${arm_Thumb1_VFPv2_SOURCES}
+  ${arm_Thumb1_icache_SOURCES})
+
+if(MINGW)
+  set(arm_SOURCES
+      arm/aeabi_idivmod.S
+      arm/aeabi_ldivmod.S
+      arm/aeabi_uidivmod.S
+      arm/aeabi_uldivmod.S
+      divmoddi4.c
+      divmodsi4.c
+      divdi3.c
+      divsi3.c
+      fixdfdi.c
+      fixsfdi.c
+      fixunsdfdi.c
+      fixunssfdi.c
+      floatdidf.c
+      floatdisf.c
+      floatundidf.c
+      floatundisf.c
+      mingw_fixfloat.c
+      moddi3.c
+      udivmoddi4.c
+      udivmodsi4.c
+      udivsi3.c
+      umoddi3.c
+      emutls.c)
+elseif(NOT WIN32)
+  # TODO the EABI sources should only be added to EABI targets
+  set(arm_SOURCES
+    ${arm_SOURCES}
+    ${arm_EABI_SOURCES}
+    ${arm_Thumb1_SOURCES})
+
+  set(thumb1_SOURCES
+    ${thumb1_SOURCES}
+    ${arm_EABI_SOURCES})
+endif()
  
  set(aarch64_SOURCES
-  comparetf2.c
-  extenddftf2.c
-  extendsftf2.c
-  fixtfdi.c
-  fixtfsi.c
-  fixtfti.c
-  fixunstfdi.c
-  fixunstfsi.c
-  fixunstfti.c
-  floatditf.c
-  floatsitf.c
-  floatunditf.c
-  floatunsitf.c
-  multc3.c
-  trunctfdf2.c
-  trunctfsf2.c
+  ${GENERIC_TF_SOURCES}
    ${GENERIC_SOURCES})
  
  set(armhf_SOURCES ${arm_SOURCES})
@@ -378,14 +447,16 @@ set(armv7k_SOURCES ${arm_SOURCES})
  set(arm64_SOURCES ${aarch64_SOURCES})
  
  # macho_embedded archs
-set(armv6m_SOURCES ${GENERIC_SOURCES})
+set(armv6m_SOURCES ${thumb1_SOURCES})
  set(armv7m_SOURCES ${arm_SOURCES})
  set(armv7em_SOURCES ${arm_SOURCES})
  
  set(mips_SOURCES ${GENERIC_SOURCES})
  set(mipsel_SOURCES ${mips_SOURCES})
-set(mips64_SOURCES ${mips_SOURCES})
-set(mips64el_SOURCES ${mips_SOURCES})
+set(mips64_SOURCES ${GENERIC_TF_SOURCES}
+                   ${mips_SOURCES})
+set(mips64el_SOURCES ${GENERIC_TF_SOURCES}
+                     ${mips_SOURCES})
  
  set(wasm32_SOURCES ${GENERIC_SOURCES})
  set(wasm64_SOURCES ${GENERIC_SOURCES})
@@ -398,26 +469,57 @@ if (APPLE)
    add_subdirectory(macho_embedded)
    darwin_add_builtin_libraries(${BUILTIN_SUPPORTED_OS})
  else ()
-  append_string_if(COMPILER_RT_HAS_STD_C99_FLAG -std=gnu99 maybe_stdc99)
+  set(BUILTIN_CFLAGS "")
+
+  append_list_if(COMPILER_RT_HAS_STD_C11_FLAG -std=c11 BUILTIN_CFLAGS)
+
+  # These flags would normally be added to CMAKE_C_FLAGS by the llvm
+  # cmake step. Add them manually if this is a standalone build.
+  if(COMPILER_RT_STANDALONE_BUILD)
+    append_list_if(COMPILER_RT_HAS_FPIC_FLAG -fPIC BUILTIN_CFLAGS)
+    append_list_if(COMPILER_RT_HAS_FNO_BUILTIN_FLAG -fno-builtin BUILTIN_CFLAGS)
+    append_list_if(COMPILER_RT_HAS_VISIBILITY_HIDDEN_FLAG -fvisibility=hidden BUILTIN_CFLAGS)
+    if(NOT COMPILER_RT_DEBUG)
+      append_list_if(COMPILER_RT_HAS_OMIT_FRAME_POINTER_FLAG -fomit-frame-pointer BUILTIN_CFLAGS)
+    endif()
+  endif()
+
+  set(BUILTIN_DEFS "")
+
+  append_list_if(COMPILER_RT_HAS_VISIBILITY_HIDDEN_FLAG VISIBILITY_HIDDEN BUILTIN_DEFS)
  
    foreach (arch ${BUILTIN_SUPPORTED_ARCH})
      if (CAN_TARGET_${arch})
+      # NOTE: some architectures (e.g. i386) have multiple names.  Ensure that
+      # we catch them all.
+      set(_arch ${arch})
+      if("${arch}" STREQUAL "i686")
+        set(_arch "i386|i686")
+      endif()
+
        # Filter out generic versions of routines that are re-implemented in
        # architecture specific manner.  This prevents multiple definitions of the
        # same symbols, making the symbol selection non-deterministic.
        foreach (_file ${${arch}_SOURCES})
-        if (${_file} MATCHES ${arch}/*)
+        if (${_file} MATCHES ${_arch}/*)
            get_filename_component(_name ${_file} NAME)
            string(REPLACE ".S" ".c" _cname "${_name}")
            list(REMOVE_ITEM ${arch}_SOURCES ${_cname})
          endif ()
        endforeach ()
  
+      # Needed for clear_cache on debug mode, due to r7's usage in inline asm.
+      # Release mode already sets it via -O2/3, Debug mode doesn't.
+      if (${arch} STREQUAL "armhf")
+        list(APPEND BUILTIN_CFLAGS -fomit-frame-pointer -DCOMPILER_RT_ARMHF_TARGET)
+      endif()
+
        add_compiler_rt_runtime(clang_rt.builtins
                                STATIC
                                ARCHS ${arch}
                                SOURCES ${${arch}_SOURCES}
-                              CFLAGS ${maybe_stdc99}
+                              DEFS ${BUILTIN_DEFS}
+                              CFLAGS ${BUILTIN_CFLAGS}
                                PARENT_TARGET builtins)
      endif ()
    endforeach ()
diff --git a/lib/libcompiler_rt/Makefile b/lib/libcompiler_rt/Makefile

index 3791940..a12a1fa 100644 (file)
--- a/lib/libcompiler_rt/Makefile
+++ b/lib/libcompiler_rt/Makefile
@@ -1,4 +1,4 @@
-# $OpenBSD: Makefile,v 1.10 2017/09/08 19:04:00 naddy Exp $
+# $OpenBSD: Makefile,v 1.11 2017/12/26 20:59:44 patrick Exp $
  
  .include <bsd.own.mk>
  
@@ -41,6 +41,8 @@ GEN_SRCS=     absvdi2 \
                 ashrdi3 \
                 ashrti3 \
                 atomic \
+               bswapdi2 \
+               bswapsi2 \
                 clear_cache \
                 clzdi2 \
                 clzsi2 \
@@ -71,6 +73,7 @@ GEN_SRCS=     absvdi2 \
                 extendsfdf2 \
                 extendhfsf2 \
                 ffsdi2 \
+               ffssi2 \
                 ffsti2 \
                 fixdfdi \
                 fixdfsi \
@@ -195,8 +198,10 @@ SRCS+=     comparetf2.c \
         fixunstfti.c \
         floatditf.c \
         floatsitf.c \
-       floatunsitf.c \
+       floattitf.c \
         floatunditf.c \
+       floatunsitf.c \
+       floatuntitf.c \
         multc3.c \
         trunctfdf2.c \
         trunctfsf2.c
@@ -220,8 +225,6 @@ SRCS+=      aeabi_cdcmp.S \
         aeabi_memset.S \
         aeabi_uidivmod.S \
         aeabi_uldivmod.S \
-       bswapdi2.S \
-       bswapsi2.S \
         switch16.S \
         switch32.S \
         switch8.S \
diff --git a/lib/libcompiler_rt/Makefile.mk b/lib/libcompiler_rt/Makefile.mk

deleted file mode 100644 (file)

index 00e2f53..0000000
--- a/lib/libcompiler_rt/Makefile.mk
+++ /dev/null
@@ -1,25 +0,0 @@
-#===- lib/builtins/Makefile.mk -----------------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs :=
-
-# Add arch specific optimized implementations.
-SubDirs += i386 ppc x86_64 arm armv6m
-
-# Add ARM64 dir.
-SubDirs += arm64
-
-# Define the variables for this specific directory.
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o)
-Implementation := Generic
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard $(Dir)/*.h)
diff --git a/lib/libcompiler_rt/README.txt b/lib/libcompiler_rt/README.txt

index ad36e4e..e603dfa 100644 (file)
--- a/lib/libcompiler_rt/README.txt
+++ b/lib/libcompiler_rt/README.txt
@@ -45,6 +45,7 @@ si_int __ctzsi2(si_int a);  // count trailing zeros
  si_int __ctzdi2(di_int a);  // count trailing zeros
  si_int __ctzti2(ti_int a);  // count trailing zeros
  
+si_int __ffssi2(si_int a);  // find least significant 1 bit
  si_int __ffsdi2(di_int a);  // find least significant 1 bit
  si_int __ffsti2(ti_int a);  // find least significant 1 bit
  
@@ -56,8 +57,8 @@ si_int __popcountsi2(si_int a);  // bit population
  si_int __popcountdi2(di_int a);  // bit population
  si_int __popcountti2(ti_int a);  // bit population
  
-uint32_t __bswapsi2(uint32_t a);   // a byteswapped, arm only
-uint64_t __bswapdi2(uint64_t a);   // a byteswapped, arm only
+uint32_t __bswapsi2(uint32_t a);   // a byteswapped
+uint64_t __bswapdi2(uint64_t a);   // a byteswapped
  
  // Integral arithmetic
  
diff --git a/lib/libcompiler_rt/adddf3.c b/lib/libcompiler_rt/adddf3.c

index 8b7aae0..c528e9e 100644 (file)
--- a/lib/libcompiler_rt/adddf3.c
+++ b/lib/libcompiler_rt/adddf3.c
@@ -15,8 +15,13 @@
  #define DOUBLE_PRECISION
  #include "fp_add_impl.inc"
  
-ARM_EABI_FNALIAS(dadd, adddf3)
-
  COMPILER_RT_ABI double __adddf3(double a, double b){
      return __addXf3__(a, b);
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI double __aeabi_dadd(double a, double b) {
+  return __adddf3(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/addsf3.c b/lib/libcompiler_rt/addsf3.c

index 0f5d6ea..fe57068 100644 (file)
--- a/lib/libcompiler_rt/addsf3.c
+++ b/lib/libcompiler_rt/addsf3.c
@@ -15,8 +15,13 @@
  #define SINGLE_PRECISION
  #include "fp_add_impl.inc"
  
-ARM_EABI_FNALIAS(fadd, addsf3)
-
  COMPILER_RT_ABI float __addsf3(float a, float b) {
      return __addXf3__(a, b);
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI float __aeabi_fadd(float a, float b) {
+  return __addsf3(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/arm/Makefile.mk b/lib/libcompiler_rt/arm/Makefile.mk

deleted file mode 100644 (file)

index ed2e832..0000000
--- a/lib/libcompiler_rt/arm/Makefile.mk
+++ /dev/null
@@ -1,20 +0,0 @@
-#===- lib/builtins/arm/Makefile.mk -------------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs := 
-OnlyArchs := armv5 armv6 armv7 armv7k armv7m armv7em armv7s
-
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
-Implementation := Optimized
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
diff --git a/lib/libcompiler_rt/arm/adddf3vfp.S b/lib/libcompiler_rt/arm/adddf3vfp.S

index f4c00a0..8e476ca 100644 (file)
--- a/lib/libcompiler_rt/arm/adddf3vfp.S
+++ b/lib/libcompiler_rt/arm/adddf3vfp.S
@@ -18,10 +18,14 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__adddf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vadd.f64 d0, d0, d1
+#else
         vmov    d6, r0, r1              // move first param from r0/r1 pair into d6
         vmov    d7, r2, r3              // move second param from r2/r3 pair into d7
         vadd.f64 d6, d6, d7             
         vmov    r0, r1, d6              // move result back to r0/r1 pair
+#endif
         bx      lr
  END_COMPILERRT_FUNCTION(__adddf3vfp)
  
diff --git a/lib/libcompiler_rt/arm/addsf3.S b/lib/libcompiler_rt/arm/addsf3.S

new file mode 100644 (file)

index 0000000..362b5c1
--- /dev/null
+++ b/lib/libcompiler_rt/arm/addsf3.S
@@ -0,0 +1,277 @@
+/*===-- addsf3.S - Adds two single precision floating pointer numbers-----===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __addsf3 (single precision floating pointer number
+ * addition with the IEEE-754 default rounding (to nearest, ties to even)
+ * function for the ARM Thumb1 ISA.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+#define significandBits 23
+#define typeWidth 32
+
+       .syntax unified
+       .text
+  .thumb
+  .p2align 2
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fadd, __addsf3)
+
+DEFINE_COMPILERRT_THUMB_FUNCTION(__addsf3)
+  push {r4, r5, r6, r7, lr}
+  // Get the absolute value of a and b.
+  lsls r2, r0, #1
+  lsls r3, r1, #1
+  lsrs r2, r2, #1  /* aAbs */
+  beq  LOCAL_LABEL(a_zero_nan_inf)
+  lsrs r3, r3, #1  /* bAbs */
+  beq  LOCAL_LABEL(zero_nan_inf)
+
+  // Detect if a or b is infinity or Nan.
+  lsrs r6, r2, #(significandBits)
+  lsrs r7, r3, #(significandBits)
+  cmp  r6, #0xFF
+  beq  LOCAL_LABEL(zero_nan_inf)
+  cmp  r7, #0xFF
+  beq  LOCAL_LABEL(zero_nan_inf)
+
+  // Swap Rep and Abs so that a and aAbs has the larger absolute value.
+  cmp r2, r3
+  bhs LOCAL_LABEL(no_swap)
+  movs r4, r0
+  movs r5, r2
+  movs r0, r1
+  movs r2, r3
+  movs r1, r4
+  movs r3, r5
+LOCAL_LABEL(no_swap):
+
+  // Get the significands and shift them to give us round, guard and sticky.
+  lsls r4, r0, #(typeWidth - significandBits)
+  lsrs r4, r4, #(typeWidth - significandBits - 3) /* aSignificand << 3 */
+  lsls r5, r1, #(typeWidth - significandBits)
+  lsrs r5, r5, #(typeWidth - significandBits - 3) /* bSignificand << 3 */
+
+  // Get the implicitBit.
+  movs r6, #1
+  lsls r6, r6, #(significandBits + 3)
+
+  // Get aExponent and set implicit bit if necessary.
+  lsrs r2, r2, #(significandBits)
+  beq LOCAL_LABEL(a_done_implicit_bit)
+  orrs r4, r6
+LOCAL_LABEL(a_done_implicit_bit):
+
+  // Get bExponent and set implicit bit if necessary.
+  lsrs r3, r3, #(significandBits)
+  beq LOCAL_LABEL(b_done_implicit_bit)
+  orrs r5, r6
+LOCAL_LABEL(b_done_implicit_bit):
+
+  // Get the difference in exponents.
+  subs r6, r2, r3
+  beq LOCAL_LABEL(done_align)
+
+  // If b is denormal, then a must be normal as align > 0, and we only need to
+  // right shift bSignificand by (align - 1) bits.
+  cmp  r3, #0
+  bne  1f
+  subs r6, r6, #1
+1:
+
+  // No longer needs bExponent. r3 is dead here.
+  // Set sticky bits of b: sticky = bSignificand << (typeWidth - align).
+  movs r3, #(typeWidth)
+  subs r3, r3, r6
+  movs r7, r5
+  lsls r7, r3
+  beq 1f
+  movs r7, #1
+1:
+
+  // bSignificand = bSignificand >> align | sticky;
+  lsrs r5, r6
+  orrs r5, r7
+  bne LOCAL_LABEL(done_align)
+  movs r5, #1 //  sticky; b is known to be non-zero.
+
+LOCAL_LABEL(done_align):
+  // isSubtraction = (aRep ^ bRep) >> 31;
+  movs r7, r0
+  eors r7, r1
+  lsrs r7, #31
+  bne LOCAL_LABEL(do_substraction)
+
+  // Same sign, do Addition.
+
+  // aSignificand += bSignificand;
+  adds r4, r4, r5
+
+  // Check carry bit.
+  movs r6, #1
+  lsls r6, r6, #(significandBits + 3 + 1)
+  movs r7, r4
+  ands r7, r6
+  beq LOCAL_LABEL(form_result)
+  // If the addition carried up, we need to right-shift the result and
+  // adjust the exponent.
+  movs r7, r4
+  movs r6, #1
+  ands r7, r6 // sticky = aSignificand & 1;
+  lsrs r4, #1
+  orrs r4, r7  // result Significand
+  adds r2, #1  // result Exponent
+  // If we have overflowed the type, return +/- infinity.
+  cmp  r2, 0xFF
+  beq  LOCAL_LABEL(ret_inf)
+
+LOCAL_LABEL(form_result):
+  // Shift the sign, exponent and significand into place.
+  lsrs r0, #(typeWidth - 1)
+  lsls r0, #(typeWidth - 1) // Get Sign.
+  lsls r2, #(significandBits)
+  orrs r0, r2
+  movs r1, r4
+  lsls r4, #(typeWidth - significandBits - 3)
+  lsrs r4, #(typeWidth - significandBits)
+  orrs r0, r4
+
+  // Final rounding.  The result may overflow to infinity, but that is the
+  // correct result in that case.
+  // roundGuardSticky = aSignificand & 0x7;
+  movs r2, #0x7
+  ands r1, r2
+  // if (roundGuardSticky > 0x4) result++;
+
+  cmp r1, #0x4
+  blt LOCAL_LABEL(done_round)
+  beq 1f
+  adds r0, #1
+  pop {r4, r5, r6, r7, pc}
+1:
+
+  // if (roundGuardSticky == 0x4) result += result & 1;
+  movs r1, r0
+  lsrs r1, #1
+  bcc  LOCAL_LABEL(done_round)
+  adds r0, r0, #1
+LOCAL_LABEL(done_round):
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(do_substraction):
+  subs r4, r4, r5 // aSignificand -= bSignificand;
+  beq  LOCAL_LABEL(ret_zero)
+  movs r6, r4
+  cmp  r2, 0
+  beq  LOCAL_LABEL(form_result) // if a's exp is 0, no need to normalize.
+  // If partial cancellation occured, we need to left-shift the result
+  // and adjust the exponent:
+  lsrs r6, r6, #(significandBits + 3)
+  bne LOCAL_LABEL(form_result)
+
+  push {r0, r1, r2, r3}
+  movs r0, r4
+  bl   __clzsi2
+  movs r5, r0
+  pop {r0, r1, r2, r3}
+  // shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3);
+  subs r5, r5, #(typeWidth - significandBits - 3 - 1)
+  // aSignificand <<= shift; aExponent -= shift;
+  lsls r4, r5
+  subs  r2, r2, r5
+  bgt LOCAL_LABEL(form_result)
+
+  // Do normalization if aExponent <= 0.
+  movs r6, #1
+  subs r6, r6, r2 // 1 - aExponent;
+  movs r2, #0 // aExponent = 0;
+  movs r3, #(typeWidth) // bExponent is dead.
+  subs r3, r3, r6
+  movs r7, r4
+  lsls r7, r3  // stickyBit = (bool)(aSignificant << (typeWidth - align))
+  beq 1f
+  movs r7, #1
+1:
+  lsrs r4, r6 /* aSignificand >> shift */
+  orrs r4, r7
+  b LOCAL_LABEL(form_result)
+
+LOCAL_LABEL(ret_zero):
+  movs r0, #0
+  pop {r4, r5, r6, r7, pc}
+
+
+LOCAL_LABEL(a_zero_nan_inf):
+  lsrs r3, r3, #1
+
+LOCAL_LABEL(zero_nan_inf):
+  // Here  r2 has aAbs, r3 has bAbs
+  movs r4, #0xFF
+  lsls r4, r4, #(significandBits) // Make +inf.
+
+  cmp r2, r4
+  bhi LOCAL_LABEL(a_is_nan)
+  cmp r3, r4
+  bhi LOCAL_LABEL(b_is_nan)
+
+  cmp r2, r4
+  bne LOCAL_LABEL(a_is_rational)
+  // aAbs is INF.
+  eors r1, r0 // aRep ^ bRep.
+  movs r6, #1
+  lsls r6, r6, #(typeWidth - 1) // get sign mask.
+  cmp r1, r6 // if they only differ on sign bit, it's -INF + INF
+  beq LOCAL_LABEL(a_is_nan)
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(a_is_rational):
+  cmp r3, r4
+  bne LOCAL_LABEL(b_is_rational)
+  movs r0, r1
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(b_is_rational):
+  // either a or b or both are zero.
+  adds r4, r2, r3
+  beq  LOCAL_LABEL(both_zero)
+  cmp r2, #0 // is absA 0 ?
+  beq LOCAL_LABEL(ret_b)
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(both_zero):
+  ands r0, r1 // +0 + -0 = +0
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(ret_b):
+  movs r0, r1
+
+LOCAL_LABEL(ret):
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(b_is_nan):
+  movs r0, r1
+LOCAL_LABEL(a_is_nan):
+  movs r1, #1
+  lsls r1, r1, #(significandBits -1) // r1 is quiet bit.
+  orrs r0, r1
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(ret_inf):
+  movs r4, #0xFF
+  lsls r4, r4, #(significandBits)
+  orrs r0, r4
+  lsrs r0, r0, #(significandBits)
+  lsls r0, r0, #(significandBits)
+  pop {r4, r5, r6, r7, pc}
+
+
+END_COMPILERRT_FUNCTION(__addsf3)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/libcompiler_rt/arm/addsf3vfp.S b/lib/libcompiler_rt/arm/addsf3vfp.S

index af40c1c..8871efd 100644 (file)
--- a/lib/libcompiler_rt/arm/addsf3vfp.S
+++ b/lib/libcompiler_rt/arm/addsf3vfp.S
@@ -18,10 +18,14 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__addsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vadd.f32 s0, s0, s1
+#else
         vmov    s14, r0         // move first param from r0 into float register
         vmov    s15, r1         // move second param from r1 into float register
         vadd.f32 s14, s14, s15
         vmov    r0, s14         // move result back to r0
+#endif
         bx      lr
  END_COMPILERRT_FUNCTION(__addsf3vfp)
  
diff --git a/lib/libcompiler_rt/arm/aeabi_cdcmp.S b/lib/libcompiler_rt/arm/aeabi_cdcmp.S

index 8008f5f..3e7a8b8 100644 (file)
--- a/lib/libcompiler_rt/arm/aeabi_cdcmp.S
+++ b/lib/libcompiler_rt/arm/aeabi_cdcmp.S
@@ -30,13 +30,32 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmpeq)
          push {r0-r3, lr}
          bl __aeabi_cdcmpeq_check_nan
          cmp r0, #1
+#if __ARM_ARCH_ISA_THUMB == 1
+        beq 1f
+        // NaN has been ruled out, so __aeabi_cdcmple can't trap
+        mov r0, sp
+        ldm r0, {r0-r3}
+        bl __aeabi_cdcmple
+        pop {r0-r3, pc}
+1:
+        // Z = 0, C = 1
+        movs r0, #0xF
+        lsls r0, r0, #31
+        pop {r0-r3, pc}
+#else
          pop {r0-r3, lr}
  
          // NaN has been ruled out, so __aeabi_cdcmple can't trap
          bne __aeabi_cdcmple
  
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
+        mov ip, #APSR_C
+        msr APSR_nzcvq, ip
+#else
          msr CPSR_f, #APSR_C
+#endif
          JMP(lr)
+#endif
  END_COMPILERRT_FUNCTION(__aeabi_cdcmpeq)
  
  
@@ -59,19 +78,48 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmple)
  
          bl __aeabi_dcmplt
          cmp r0, #1
+#if __ARM_ARCH_ISA_THUMB == 1
+        bne 1f
+        // Z = 0, C = 0
+        movs r0, #1
+        lsls r0, r0, #1
+        pop {r0-r3, pc}
+1:
+        mov r0, sp
+        ldm r0, {r0-r3}
+        bl __aeabi_dcmpeq
+        cmp r0, #1
+        bne 2f
+        // Z = 1, C = 1
+        movs r0, #2
+        lsls r0, r0, #31
+        pop {r0-r3, pc}
+2:
+        // Z = 0, C = 1
+        movs r0, #0xF
+        lsls r0, r0, #31
+        pop {r0-r3, pc}
+#else
+        ITT(eq)
          moveq ip, #0
          beq 1f
  
          ldm sp, {r0-r3}
          bl __aeabi_dcmpeq
          cmp r0, #1
+        ITE(eq)
          moveq ip, #(APSR_C | APSR_Z)
          movne ip, #(APSR_C)
  
  1:
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
+        msr APSR_nzcvq, ip
+#else
          msr CPSR_f, ip
+#endif
          pop {r0-r3}
          POP_PC()
+#endif
  END_COMPILERRT_FUNCTION(__aeabi_cdcmple)
  
  // int __aeabi_cdrcmple(double a, double b) {
diff --git a/lib/libcompiler_rt/arm/aeabi_cdcmpeq_check_nan.c b/lib/libcompiler_rt/arm/aeabi_cdcmpeq_check_nan.c

index 577f6b2..7578433 100644 (file)
--- a/lib/libcompiler_rt/arm/aeabi_cdcmpeq_check_nan.c
+++ b/lib/libcompiler_rt/arm/aeabi_cdcmpeq_check_nan.c
@@ -8,9 +8,9 @@
  //===----------------------------------------------------------------------===//
  
  #include <stdint.h>
+#include "../int_lib.h"
  
-__attribute__((pcs("aapcs")))
-__attribute__((visibility("hidden")))
+AEABI_RTABI __attribute__((visibility("hidden")))
  int __aeabi_cdcmpeq_check_nan(double a, double b) {
      return __builtin_isnan(a) || __builtin_isnan(b);
  }
diff --git a/lib/libcompiler_rt/arm/aeabi_cfcmp.S b/lib/libcompiler_rt/arm/aeabi_cfcmp.S

index 274baf7..1f304ff 100644 (file)
--- a/lib/libcompiler_rt/arm/aeabi_cfcmp.S
+++ b/lib/libcompiler_rt/arm/aeabi_cfcmp.S
@@ -30,13 +30,32 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmpeq)
          push {r0-r3, lr}
          bl __aeabi_cfcmpeq_check_nan
          cmp r0, #1
+#if __ARM_ARCH_ISA_THUMB == 1
+        beq 1f
+        // NaN has been ruled out, so __aeabi_cfcmple can't trap
+        mov r0, sp
+        ldm r0, {r0-r3}
+        bl __aeabi_cfcmple
+        pop {r0-r3, pc}
+1:
+        // Z = 0, C = 1
+        movs r0, #0xF
+        lsls r0, r0, #31
+        pop {r0-r3, pc}
+#else
          pop {r0-r3, lr}
  
          // NaN has been ruled out, so __aeabi_cfcmple can't trap
          bne __aeabi_cfcmple
  
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
+        mov ip, #APSR_C
+        msr APSR_nzcvq, ip
+#else
          msr CPSR_f, #APSR_C
+#endif
          JMP(lr)
+#endif
  END_COMPILERRT_FUNCTION(__aeabi_cfcmpeq)
  
  
@@ -59,19 +78,48 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmple)
  
          bl __aeabi_fcmplt
          cmp r0, #1
+#if __ARM_ARCH_ISA_THUMB == 1
+        bne 1f
+        // Z = 0, C = 0
+        movs r0, #1
+        lsls r0, r0, #1
+        pop {r0-r3, pc}
+1:
+        mov r0, sp
+        ldm r0, {r0-r3}
+        bl __aeabi_fcmpeq
+        cmp r0, #1
+        bne 2f
+        // Z = 1, C = 1
+        movs r0, #2
+        lsls r0, r0, #31
+        pop {r0-r3, pc}
+2:
+        // Z = 0, C = 1
+        movs r0, #0xF
+        lsls r0, r0, #31
+        pop {r0-r3, pc}
+#else
+        ITT(eq)
          moveq ip, #0
          beq 1f
  
          ldm sp, {r0-r3}
          bl __aeabi_fcmpeq
          cmp r0, #1
+        ITE(eq)
          moveq ip, #(APSR_C | APSR_Z)
          movne ip, #(APSR_C)
  
  1:
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
+        msr APSR_nzcvq, ip
+#else
          msr CPSR_f, ip
+#endif
          pop {r0-r3}
          POP_PC()
+#endif
  END_COMPILERRT_FUNCTION(__aeabi_cfcmple)
  
  // int __aeabi_cfrcmple(float a, float b) {
diff --git a/lib/libcompiler_rt/arm/aeabi_cfcmpeq_check_nan.c b/lib/libcompiler_rt/arm/aeabi_cfcmpeq_check_nan.c

index 992e31f..43dde9a 100644 (file)
--- a/lib/libcompiler_rt/arm/aeabi_cfcmpeq_check_nan.c
+++ b/lib/libcompiler_rt/arm/aeabi_cfcmpeq_check_nan.c
@@ -8,9 +8,9 @@
  //===----------------------------------------------------------------------===//
  
  #include <stdint.h>
+#include "../int_lib.h"
  
-__attribute__((pcs("aapcs")))
-__attribute__((visibility("hidden")))
+AEABI_RTABI __attribute__((visibility("hidden")))
  int __aeabi_cfcmpeq_check_nan(float a, float b) {
      return __builtin_isnan(a) || __builtin_isnan(b);
  }
diff --git a/lib/libcompiler_rt/arm/aeabi_dcmp.S b/lib/libcompiler_rt/arm/aeabi_dcmp.S

index 43e4392..9fa78b4 100644 (file)
--- a/lib/libcompiler_rt/arm/aeabi_dcmp.S
+++ b/lib/libcompiler_rt/arm/aeabi_dcmp.S
@@ -18,18 +18,27 @@
  //   }
  // }
  
+#if defined(COMPILER_RT_ARMHF_TARGET)
+#  define CONVERT_DCMP_ARGS_TO_DF2_ARGS                    \
+        vmov      d0, r0, r1                     SEPARATOR \
+        vmov      d1, r2, r3
+#else
+#  define CONVERT_DCMP_ARGS_TO_DF2_ARGS
+#endif
+
  #define DEFINE_AEABI_DCMP(cond)                            \
          .syntax unified                          SEPARATOR \
          .p2align 2                               SEPARATOR \
  DEFINE_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond)           \
          push      { r4, lr }                     SEPARATOR \
+        CONVERT_DCMP_ARGS_TO_DF2_ARGS            SEPARATOR \
          bl        SYMBOL_NAME(__ ## cond ## df2) SEPARATOR \
          cmp       r0, #0                         SEPARATOR \
          b ## cond 1f                             SEPARATOR \
-        mov       r0, #0                         SEPARATOR \
+        movs      r0, #0                         SEPARATOR \
          pop       { r4, pc }                     SEPARATOR \
  1:                                               SEPARATOR \
-        mov       r0, #1                         SEPARATOR \
+        movs      r0, #1                         SEPARATOR \
          pop       { r4, pc }                     SEPARATOR \
  END_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond)
  
diff --git a/lib/libcompiler_rt/arm/aeabi_div0.c b/lib/libcompiler_rt/arm/aeabi_div0.c

index ccc95fa..dc30313 100644 (file)
--- a/lib/libcompiler_rt/arm/aeabi_div0.c
+++ b/lib/libcompiler_rt/arm/aeabi_div0.c
@@ -26,16 +26,18 @@
   * line.
   */
  
+#include "../int_lib.h"
+
  /* provide an unused declaration to pacify pendantic compilation */
  extern unsigned char declaration;
  
  #if defined(__ARM_EABI__)
-int __attribute__((weak)) __attribute__((visibility("hidden")))
+AEABI_RTABI int __attribute__((weak)) __attribute__((visibility("hidden")))
  __aeabi_idiv0(int return_value) {
    return return_value;
  }
  
-long long __attribute__((weak)) __attribute__((visibility("hidden")))
+AEABI_RTABI long long __attribute__((weak)) __attribute__((visibility("hidden")))
  __aeabi_ldiv0(long long return_value) {
    return return_value;
  }
diff --git a/lib/libcompiler_rt/arm/aeabi_drsub.c b/lib/libcompiler_rt/arm/aeabi_drsub.c

index fc17d5a..1254886 100644 (file)
--- a/lib/libcompiler_rt/arm/aeabi_drsub.c
+++ b/lib/libcompiler_rt/arm/aeabi_drsub.c
@@ -10,10 +10,10 @@
  #define DOUBLE_PRECISION
  #include "../fp_lib.h"
  
-COMPILER_RT_ABI fp_t
+AEABI_RTABI fp_t
  __aeabi_dsub(fp_t, fp_t);
  
-COMPILER_RT_ABI fp_t
+AEABI_RTABI fp_t
  __aeabi_drsub(fp_t a, fp_t b) {
      return __aeabi_dsub(b, a);
  }
diff --git a/lib/libcompiler_rt/arm/aeabi_fcmp.S b/lib/libcompiler_rt/arm/aeabi_fcmp.S

index 0a1d92a..ea5b96c 100644 (file)
--- a/lib/libcompiler_rt/arm/aeabi_fcmp.S
+++ b/lib/libcompiler_rt/arm/aeabi_fcmp.S
@@ -18,18 +18,27 @@
  //   }
  // }
  
+#if defined(COMPILER_RT_ARMHF_TARGET)
+#  define CONVERT_FCMP_ARGS_TO_SF2_ARGS                    \
+        vmov      s0, r0                         SEPARATOR \
+        vmov      s1, r1
+#else
+#  define CONVERT_FCMP_ARGS_TO_SF2_ARGS
+#endif
+
  #define DEFINE_AEABI_FCMP(cond)                            \
          .syntax unified                          SEPARATOR \
          .p2align 2                               SEPARATOR \
  DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond)           \
          push      { r4, lr }                     SEPARATOR \
+        CONVERT_FCMP_ARGS_TO_SF2_ARGS            SEPARATOR \
          bl        SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \
          cmp       r0, #0                         SEPARATOR \
          b ## cond 1f                             SEPARATOR \
-        mov       r0, #0                         SEPARATOR \
+        movs      r0, #0                         SEPARATOR \
          pop       { r4, pc }                     SEPARATOR \
  1:                                               SEPARATOR \
-        mov       r0, #1                         SEPARATOR \
+        movs      r0, #1                         SEPARATOR \
          pop       { r4, pc }                     SEPARATOR \
  END_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond)
  
diff --git a/lib/libcompiler_rt/arm/aeabi_frsub.c b/lib/libcompiler_rt/arm/aeabi_frsub.c

index 64258dc..34f2303 100644 (file)
--- a/lib/libcompiler_rt/arm/aeabi_frsub.c
+++ b/lib/libcompiler_rt/arm/aeabi_frsub.c
@@ -10,10 +10,10 @@
  #define SINGLE_PRECISION
  #include "../fp_lib.h"
  
-COMPILER_RT_ABI fp_t
+AEABI_RTABI fp_t
  __aeabi_fsub(fp_t, fp_t);
  
-COMPILER_RT_ABI fp_t
+AEABI_RTABI fp_t
  __aeabi_frsub(fp_t a, fp_t b) {
      return __aeabi_fsub(b, a);
  }
diff --git a/lib/libcompiler_rt/arm/aeabi_idivmod.S b/lib/libcompiler_rt/arm/aeabi_idivmod.S

index 2fcad86..0164b15 100644 (file)
--- a/lib/libcompiler_rt/arm/aeabi_idivmod.S
+++ b/lib/libcompiler_rt/arm/aeabi_idivmod.S
@@ -15,16 +15,34 @@
  //   return {quot, rem};
  // }
  
+#if defined(__MINGW32__)
+#define __aeabi_idivmod __rt_sdiv
+#endif
+
          .syntax unified
          .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod)
+#if __ARM_ARCH_ISA_THUMB == 1
+        push    {r0, r1, lr}
+        bl      SYMBOL_NAME(__divsi3)
+        pop     {r1, r2, r3} // now r0 = quot, r1 = num, r2 = denom
+        muls    r2, r0, r2   // r2 = quot * denom
+        subs    r1, r1, r2
+        JMP     (r3)
+#else
          push    { lr }
          sub     sp, sp, #4
          mov     r2, sp
+#if defined(__MINGW32__)
+        mov     r3, r0
+        mov     r0, r1
+        mov     r1, r3
+#endif
          bl      SYMBOL_NAME(__divmodsi4)
          ldr     r1, [sp]
          add     sp, sp, #4
          pop     { pc }
+#endif // __ARM_ARCH_ISA_THUMB == 1
  END_COMPILERRT_FUNCTION(__aeabi_idivmod)
  
  NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/libcompiler_rt/arm/aeabi_ldivmod.S b/lib/libcompiler_rt/arm/aeabi_ldivmod.S

index 9f161f3..038ae5d 100644 (file)
--- a/lib/libcompiler_rt/arm/aeabi_ldivmod.S
+++ b/lib/libcompiler_rt/arm/aeabi_ldivmod.S
@@ -16,18 +16,30 @@
  //   return {quot, rem};
  // }
  
+#if defined(__MINGW32__)
+#define __aeabi_ldivmod __rt_sdiv64
+#endif
+
          .syntax unified
          .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__aeabi_ldivmod)
-        push    {r11, lr}
+        push    {r6, lr}
          sub     sp, sp, #16
-        add     r12, sp, #8
-        str     r12, [sp]
+        add     r6, sp, #8
+        str     r6, [sp]
+#if defined(__MINGW32__)
+        movs    r6, r0
+        movs    r0, r2
+        movs    r2, r6
+        movs    r6, r1
+        movs    r1, r3
+        movs    r3, r6
+#endif
          bl      SYMBOL_NAME(__divmoddi4)
          ldr     r2, [sp, #8]
          ldr     r3, [sp, #12]
          add     sp, sp, #16
-        pop     {r11, pc}
+        pop     {r6, pc}
  END_COMPILERRT_FUNCTION(__aeabi_ldivmod)
  
  NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/libcompiler_rt/arm/aeabi_memset.S b/lib/libcompiler_rt/arm/aeabi_memset.S

index 48edd89..633f592 100644 (file)
--- a/lib/libcompiler_rt/arm/aeabi_memset.S
+++ b/lib/libcompiler_rt/arm/aeabi_memset.S
@@ -26,7 +26,7 @@ DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset8, __aeabi_memset)
  
  DEFINE_COMPILERRT_FUNCTION(__aeabi_memclr)
          mov     r2, r1
-        mov     r1, #0
+        movs    r1, #0
          b       memset
  END_COMPILERRT_FUNCTION(__aeabi_memclr)
  
diff --git a/lib/libcompiler_rt/arm/aeabi_uidivmod.S b/lib/libcompiler_rt/arm/aeabi_uidivmod.S

index e1e12d9..a627fc7 100644 (file)
--- a/lib/libcompiler_rt/arm/aeabi_uidivmod.S
+++ b/lib/libcompiler_rt/arm/aeabi_uidivmod.S
@@ -16,16 +16,40 @@
  //   return {quot, rem};
  // }
  
+#if defined(__MINGW32__)
+#define __aeabi_uidivmod __rt_udiv
+#endif
+
          .syntax unified
          .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod)
+#if __ARM_ARCH_ISA_THUMB == 1
+        cmp     r0, r1
+        bcc     LOCAL_LABEL(case_denom_larger)
+        push    {r0, r1, lr}
+        bl      SYMBOL_NAME(__aeabi_uidiv)
+        pop     {r1, r2, r3}
+        muls    r2, r0, r2 // r2 = quot * denom
+        subs    r1, r1, r2
+        JMP     (r3)
+LOCAL_LABEL(case_denom_larger):
+        movs    r1, r0
+        movs    r0, #0
+        JMP     (lr)
+#else
          push    { lr }
          sub     sp, sp, #4
          mov     r2, sp
+#if defined(__MINGW32__)
+        mov     r3, r0
+        mov     r0, r1
+        mov     r1, r3
+#endif
          bl      SYMBOL_NAME(__udivmodsi4)
          ldr     r1, [sp]
          add     sp, sp, #4
          pop     { pc }
+#endif
  END_COMPILERRT_FUNCTION(__aeabi_uidivmod)
  
  NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/libcompiler_rt/arm/aeabi_uldivmod.S b/lib/libcompiler_rt/arm/aeabi_uldivmod.S

index e8aaef2..be343b6 100644 (file)
--- a/lib/libcompiler_rt/arm/aeabi_uldivmod.S
+++ b/lib/libcompiler_rt/arm/aeabi_uldivmod.S
@@ -16,18 +16,30 @@
  //   return {quot, rem};
  // }
  
+#if defined(__MINGW32__)
+#define __aeabi_uldivmod __rt_udiv64
+#endif
+
          .syntax unified
          .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__aeabi_uldivmod)
-        push   {r11, lr}
+        push   {r6, lr}
          sub    sp, sp, #16
-        add    r12, sp, #8
-        str    r12, [sp]
+        add    r6, sp, #8
+        str    r6, [sp]
+#if defined(__MINGW32__)
+        movs    r6, r0
+        movs    r0, r2
+        movs    r2, r6
+        movs    r6, r1
+        movs    r1, r3
+        movs    r3, r6
+#endif
          bl     SYMBOL_NAME(__udivmoddi4)
          ldr    r2, [sp, #8]
          ldr    r3, [sp, #12]
          add    sp, sp, #16
-        pop    {r11, pc}
+        pop    {r6, pc}
  END_COMPILERRT_FUNCTION(__aeabi_uldivmod)
  
  NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/libcompiler_rt/arm/comparesf2.S b/lib/libcompiler_rt/arm/comparesf2.S

index 52597b6..ef7091b 100644 (file)
--- a/lib/libcompiler_rt/arm/comparesf2.S
+++ b/lib/libcompiler_rt/arm/comparesf2.S
@@ -39,32 +39,64 @@
  
  #include "../assembly.h"
  .syntax unified
+#if __ARM_ARCH_ISA_THUMB == 2
+.thumb
+#endif
  
-.p2align 2
+@ int __eqsf2(float a, float b)
+
+    .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__eqsf2)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+    vmov r0, s0
+    vmov r1, s1
+#endif
      // Make copies of a and b with the sign bit shifted off the top.  These will
      // be used to detect zeros and NaNs.
+#if __ARM_ARCH_ISA_THUMB == 1
+    push    {r6, lr}
+    lsls    r2,         r0, #1
+    lsls    r3,         r1, #1
+#else
      mov     r2,         r0, lsl #1
      mov     r3,         r1, lsl #1
+#endif
  
      // We do the comparison in three stages (ignoring NaN values for the time
      // being).  First, we orr the absolute values of a and b; this sets the Z
      // flag if both a and b are zero (of either sign).  The shift of r3 doesn't
      // effect this at all, but it *does* make sure that the C flag is clear for
      // the subsequent operations.
+#if __ARM_ARCH_ISA_THUMB == 1
+    lsrs    r6,     r3, #1
+    orrs    r6,     r2
+#else
      orrs    r12,    r2, r3, lsr #1
-
+#endif
      // Next, we check if a and b have the same or different signs.  If they have
      // opposite signs, this eor will set the N flag.
+#if __ARM_ARCH_ISA_THUMB == 1
+    beq     1f
+    movs    r6,     r0
+    eors    r6,     r1
+1:
+#else
      it ne
      eorsne  r12,    r0, r1
+#endif
  
      // If a and b are equal (either both zeros or bit identical; again, we're
      // ignoring NaNs for now), this subtract will zero out r0.  If they have the
      // same sign, the flags are updated as they would be for a comparison of the
      // absolute values of a and b.
+#if __ARM_ARCH_ISA_THUMB == 1
+    bmi     1f
+    subs    r0,     r2, r3
+1:
+#else
      it pl
      subspl  r0,     r2, r3
+#endif
  
      // If a is smaller in magnitude than b and both have the same sign, place
      // the negation of the sign of b in r0.  Thus, if both are negative and
@@ -76,41 +108,126 @@ DEFINE_COMPILERRT_FUNCTION(__eqsf2)
      // still clear from the shift argument in orrs; if a is positive and b
      // negative, this places 0 in r0; if a is negative and b positive, -1 is
      // placed in r0.
+#if __ARM_ARCH_ISA_THUMB == 1
+    bhs     1f
+    // Here if a and b have the same sign and absA < absB, the result is thus
+    // b < 0 ? 1 : -1. Same if a and b have the opposite sign (ignoring Nan).
+    movs    r0,         #1
+    lsrs    r1,         #31
+    bne     LOCAL_LABEL(CHECK_NAN)
+    negs    r0,         r0
+    b       LOCAL_LABEL(CHECK_NAN)
+1:
+#else
      it lo
      mvnlo   r0,         r1, asr #31
+#endif
  
      // If a is greater in magnitude than b and both have the same sign, place
      // the sign of b in r0.  Thus, if both are negative and a < b, -1 is placed
      // in r0, which is the desired result.  Conversely, if both are positive
      // and a > b, zero is placed in r0.
+#if __ARM_ARCH_ISA_THUMB == 1
+    bls     1f
+    // Here both have the same sign and absA > absB.
+    movs    r0,         #1
+    lsrs    r1,         #31
+    beq     LOCAL_LABEL(CHECK_NAN)
+    negs    r0, r0
+1:
+#else
      it hi
      movhi   r0,         r1, asr #31
+#endif
  
      // If you've been keeping track, at this point r0 contains -1 if a < b and
      // 0 if a >= b.  All that remains to be done is to set it to 1 if a > b.
      // If a == b, then the Z flag is set, so we can get the correct final value
      // into r0 by simply or'ing with 1 if Z is clear.
+    // For Thumb-1, r0 contains -1 if a < b, 0 if a > b and 0 if a == b.
+#if __ARM_ARCH_ISA_THUMB != 1
      it ne
      orrne   r0,     r0, #1
+#endif
  
      // Finally, we need to deal with NaNs.  If either argument is NaN, replace
      // the value in r0 with 1.
+#if __ARM_ARCH_ISA_THUMB == 1
+LOCAL_LABEL(CHECK_NAN):
+    movs    r6,         #0xff
+    lsls    r6,         #24
+    cmp     r2,         r6
+    bhi     1f
+    cmp     r3,         r6
+1:
+    bls     2f
+    movs    r0,         #1
+2:
+    pop     {r6, pc}
+#else
      cmp     r2,         #0xff000000
      ite ls
      cmpls   r3,         #0xff000000
      movhi   r0,         #1
      JMP(lr)
+#endif
  END_COMPILERRT_FUNCTION(__eqsf2)
+
  DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __eqsf2)
  DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __eqsf2)
  DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __eqsf2)
  
-.p2align 2
+@ int __gtsf2(float a, float b)
+
+    .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__gtsf2)
      // Identical to the preceding except in that we return -1 for NaN values.
-    // Given that the two paths share so much code, one might be tempted to 
+    // Given that the two paths share so much code, one might be tempted to
      // unify them; however, the extra code needed to do so makes the code size
      // to performance tradeoff very hard to justify for such small functions.
+#if defined(COMPILER_RT_ARMHF_TARGET)
+    vmov r0, s0
+    vmov r1, s1
+#endif
+#if __ARM_ARCH_ISA_THUMB == 1
+    push    {r6, lr}
+    lsls    r2,        r0, #1
+    lsls    r3,        r1, #1
+    lsrs    r6,        r3, #1
+    orrs    r6,        r2
+    beq     1f
+    movs    r6,        r0
+    eors    r6,        r1
+1:
+    bmi     2f
+    subs    r0,        r2, r3
+2:
+    bhs     3f
+    movs    r0,        #1
+    lsrs    r1,        #31
+    bne     LOCAL_LABEL(CHECK_NAN_2)
+    negs    r0, r0
+    b       LOCAL_LABEL(CHECK_NAN_2)
+3:
+    bls     4f
+    movs    r0,         #1
+    lsrs    r1,         #31
+    beq     LOCAL_LABEL(CHECK_NAN_2)
+    negs    r0, r0
+4:
+LOCAL_LABEL(CHECK_NAN_2):
+    movs    r6,         #0xff
+    lsls    r6,         #24
+    cmp     r2,         r6
+    bhi     5f
+    cmp     r3,         r6
+5:
+    bls     6f
+    movs    r0,         #1
+    negs    r0,         r0
+6:
+    pop     {r6, pc}
+#else
      mov     r2,         r0, lsl #1
      mov     r3,         r1, lsl #1
      orrs    r12,    r2, r3, lsr #1
@@ -129,23 +246,51 @@ DEFINE_COMPILERRT_FUNCTION(__gtsf2)
      cmpls   r3,         #0xff000000
      movhi   r0,         #-1
      JMP(lr)
+#endif
  END_COMPILERRT_FUNCTION(__gtsf2)
+
  DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __gtsf2)
  
-.p2align 2
+@ int __unordsf2(float a, float b)
+
+    .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__unordsf2)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+    vmov    r0,         s0
+    vmov    r1,         s1
+#endif
      // Return 1 for NaN values, 0 otherwise.
-    mov     r2,         r0, lsl #1
-    mov     r3,         r1, lsl #1
-    mov     r0,         #0
+    lsls    r2,         r0, #1
+    lsls    r3,         r1, #1
+    movs    r0,         #0
+#if __ARM_ARCH_ISA_THUMB == 1
+    movs    r1,         #0xff
+    lsls    r1,         #24
+    cmp     r2,         r1
+    bhi     1f
+    cmp     r3,         r1
+1:
+    bls     2f
+    movs    r0,         #1
+2:
+#else
      cmp     r2,         #0xff000000
      ite ls
      cmpls   r3,         #0xff000000
      movhi   r0,         #1
+#endif
      JMP(lr)
  END_COMPILERRT_FUNCTION(__unordsf2)
  
+#if defined(COMPILER_RT_ARMHF_TARGET)
+DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmpum)
+       vmov s0, r0
+       vmov s1, r1
+       b SYMBOL_NAME(__unordsf2)
+END_COMPILERRT_FUNCTION(__aeabi_fcmpum)
+#else
  DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fcmpun, __unordsf2)
+#endif
  
  NO_EXEC_STACK_DIRECTIVE
  
diff --git a/lib/libcompiler_rt/arm/divdf3vfp.S b/lib/libcompiler_rt/arm/divdf3vfp.S

index 928f538..776ba4f 100644 (file)
--- a/lib/libcompiler_rt/arm/divdf3vfp.S
+++ b/lib/libcompiler_rt/arm/divdf3vfp.S
@@ -18,10 +18,14 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__divdf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vdiv.f64 d0, d0, d1
+#else
         vmov    d6, r0, r1              // move first param from r0/r1 pair into d6
         vmov    d7, r2, r3              // move second param from r2/r3 pair into d7
-       vdiv.f64 d5, d6, d7             
+       vdiv.f64 d5, d6, d7
         vmov    r0, r1, d5              // move result back to r0/r1 pair
+#endif
         bx      lr
  END_COMPILERRT_FUNCTION(__divdf3vfp)
  
diff --git a/lib/libcompiler_rt/arm/divsf3vfp.S b/lib/libcompiler_rt/arm/divsf3vfp.S

index a2e297f..130318f 100644 (file)
--- a/lib/libcompiler_rt/arm/divsf3vfp.S
+++ b/lib/libcompiler_rt/arm/divsf3vfp.S
@@ -18,10 +18,14 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__divsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vdiv.f32 s0, s0, s1
+#else
         vmov    s14, r0         // move first param from r0 into float register
         vmov    s15, r1         // move second param from r1 into float register
         vdiv.f32 s13, s14, s15
         vmov    r0, s13         // move result back to r0
+#endif
         bx      lr
  END_COMPILERRT_FUNCTION(__divsf3vfp)
  
diff --git a/lib/libcompiler_rt/arm/divsi3.S b/lib/libcompiler_rt/arm/divsi3.S

index 7e23ba4..f066f60 100644 (file)
--- a/lib/libcompiler_rt/arm/divsi3.S
+++ b/lib/libcompiler_rt/arm/divsi3.S
@@ -49,17 +49,37 @@ LOCAL_LABEL(divzero):
  #else
  ESTABLISH_FRAME
  //  Set aside the sign of the quotient.
+#  if __ARM_ARCH_ISA_THUMB == 1
+    movs    r4,     r0
+    eors    r4,     r1
+#  else
      eor     r4,     r0, r1
+#  endif
  //  Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31).
+#  if   __ARM_ARCH_ISA_THUMB == 1
+    asrs    r2,     r0, #31
+    asrs    r3,     r1, #31
+    eors    r0,     r2
+    eors    r1,     r3
+    subs    r0,     r0, r2
+    subs    r1,     r1, r3
+#  else
      eor     r2,     r0, r0, asr #31
      eor     r3,     r1, r1, asr #31
      sub     r0,     r2, r0, asr #31
      sub     r1,     r3, r1, asr #31
+#  endif
  //  abs(a) / abs(b)
      bl      SYMBOL_NAME(__udivsi3)
  //  Apply sign of quotient to result and return.
+#  if __ARM_ARCH_ISA_THUMB == 1
+    asrs    r4,     #31
+    eors    r0,     r4
+    subs    r0,     r0, r4
+#  else
      eor     r0,     r0, r4, asr #31
      sub     r0,     r0, r4, asr #31
+#  endif
      CLEAR_FRAME_AND_RETURN
  #endif
  END_COMPILERRT_FUNCTION(__divsi3)
diff --git a/lib/libcompiler_rt/arm/eqdf2vfp.S b/lib/libcompiler_rt/arm/eqdf2vfp.S

index 95e6bb3..d507065 100644 (file)
--- a/lib/libcompiler_rt/arm/eqdf2vfp.S
+++ b/lib/libcompiler_rt/arm/eqdf2vfp.S
@@ -19,10 +19,15 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__eqdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vcmp.f64 d0, d1
+#else
         vmov    d6, r0, r1      // load r0/r1 pair in double register
         vmov    d7, r2, r3      // load r2/r3 pair in double register
         vcmp.f64 d6, d7         
+#endif
         vmrs    apsr_nzcv, fpscr
+       ITE(eq)
         moveq   r0, #1          // set result register to 1 if equal
         movne   r0, #0
         bx      lr
diff --git a/lib/libcompiler_rt/arm/eqsf2vfp.S b/lib/libcompiler_rt/arm/eqsf2vfp.S

index fbac139..fd72b2f 100644 (file)
--- a/lib/libcompiler_rt/arm/eqsf2vfp.S
+++ b/lib/libcompiler_rt/arm/eqsf2vfp.S
@@ -19,10 +19,15 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__eqsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vcmp.f32 s0, s1
+#else
         vmov    s14, r0     // move from GPR 0 to float register
         vmov    s15, r1     // move from GPR 1 to float register
         vcmp.f32 s14, s15
+#endif
         vmrs    apsr_nzcv, fpscr
+       ITE(eq)
         moveq   r0, #1      // set result register to 1 if equal
         movne   r0, #0
         bx      lr
diff --git a/lib/libcompiler_rt/arm/extendsfdf2vfp.S b/lib/libcompiler_rt/arm/extendsfdf2vfp.S

index 563bf92..1079f97 100644 (file)
--- a/lib/libcompiler_rt/arm/extendsfdf2vfp.S
+++ b/lib/libcompiler_rt/arm/extendsfdf2vfp.S
@@ -19,9 +19,13 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__extendsfdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vcvt.f64.f32 d0, s0
+#else
         vmov    s15, r0      // load float register from R0
         vcvt.f64.f32 d7, s15 // convert single to double
         vmov    r0, r1, d7   // return result in r0/r1 pair
+#endif
         bx      lr
  END_COMPILERRT_FUNCTION(__extendsfdf2vfp)
  
diff --git a/lib/libcompiler_rt/arm/fixdfsivfp.S b/lib/libcompiler_rt/arm/fixdfsivfp.S

index 8263ff9..5d7b0f8 100644 (file)
--- a/lib/libcompiler_rt/arm/fixdfsivfp.S
+++ b/lib/libcompiler_rt/arm/fixdfsivfp.S
@@ -19,9 +19,14 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__fixdfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vcvt.s32.f64 s0, d0
+       vmov r0, s0
+#else
         vmov    d7, r0, r1    // load double register from R0/R1
         vcvt.s32.f64 s15, d7  // convert double to 32-bit int into s15
         vmov    r0, s15       // move s15 to result register
+#endif
         bx      lr
  END_COMPILERRT_FUNCTION(__fixdfsivfp)
  
diff --git a/lib/libcompiler_rt/arm/fixsfsivfp.S b/lib/libcompiler_rt/arm/fixsfsivfp.S

index c7c3b81..805a277 100644 (file)
--- a/lib/libcompiler_rt/arm/fixsfsivfp.S
+++ b/lib/libcompiler_rt/arm/fixsfsivfp.S
@@ -19,9 +19,14 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__fixsfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vcvt.s32.f32 s0, s0
+       vmov r0, s0
+#else
         vmov    s15, r0        // load float register from R0
         vcvt.s32.f32 s15, s15  // convert single to 32-bit int into s15
         vmov    r0, s15        // move s15 to result register
+#endif
         bx      lr
  END_COMPILERRT_FUNCTION(__fixsfsivfp)
  
diff --git a/lib/libcompiler_rt/arm/fixunsdfsivfp.S b/lib/libcompiler_rt/arm/fixunsdfsivfp.S

index 9cc1e62..4f1b2c8 100644 (file)
--- a/lib/libcompiler_rt/arm/fixunsdfsivfp.S
+++ b/lib/libcompiler_rt/arm/fixunsdfsivfp.S
@@ -20,9 +20,14 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__fixunsdfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vcvt.u32.f64 s0, d0
+       vmov r0, s0
+#else
         vmov    d7, r0, r1    // load double register from R0/R1
         vcvt.u32.f64 s15, d7  // convert double to 32-bit int into s15
         vmov    r0, s15       // move s15 to result register
+#endif
         bx      lr
  END_COMPILERRT_FUNCTION(__fixunsdfsivfp)
  
diff --git a/lib/libcompiler_rt/arm/fixunssfsivfp.S b/lib/libcompiler_rt/arm/fixunssfsivfp.S

index 79d7082..e5d7782 100644 (file)
--- a/lib/libcompiler_rt/arm/fixunssfsivfp.S
+++ b/lib/libcompiler_rt/arm/fixunssfsivfp.S
@@ -20,9 +20,14 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__fixunssfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vcvt.u32.f32 s0, s0
+       vmov r0, s0
+#else
         vmov    s15, r0        // load float register from R0
         vcvt.u32.f32 s15, s15  // convert single to 32-bit unsigned into s15
         vmov    r0, s15        // move s15 to result register
+#endif
         bx      lr
  END_COMPILERRT_FUNCTION(__fixunssfsivfp)
  
diff --git a/lib/libcompiler_rt/arm/floatsidfvfp.S b/lib/libcompiler_rt/arm/floatsidfvfp.S

index 7623f26..3297ad4 100644 (file)
--- a/lib/libcompiler_rt/arm/floatsidfvfp.S
+++ b/lib/libcompiler_rt/arm/floatsidfvfp.S
@@ -19,9 +19,14 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__floatsidfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vmov s0, r0
+       vcvt.f64.s32 d0, s0
+#else
         vmov    s15, r0        // move int to float register s15
         vcvt.f64.s32 d7, s15   // convert 32-bit int in s15 to double in d7
         vmov    r0, r1, d7     // move d7 to result register pair r0/r1
+#endif
         bx      lr
  END_COMPILERRT_FUNCTION(__floatsidfvfp)
  
diff --git a/lib/libcompiler_rt/arm/floatsisfvfp.S b/lib/libcompiler_rt/arm/floatsisfvfp.S

index c73dfac..65408b5 100644 (file)
--- a/lib/libcompiler_rt/arm/floatsisfvfp.S
+++ b/lib/libcompiler_rt/arm/floatsisfvfp.S
@@ -19,9 +19,14 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__floatsisfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vmov s0, r0
+       vcvt.f32.s32 s0, s0
+#else
         vmov    s15, r0        // move int to float register s15
         vcvt.f32.s32 s15, s15  // convert 32-bit int in s15 to float in s15
         vmov    r0, s15        // move s15 to result register
+#endif
         bx      lr
  END_COMPILERRT_FUNCTION(__floatsisfvfp)
  
diff --git a/lib/libcompiler_rt/arm/floatunssidfvfp.S b/lib/libcompiler_rt/arm/floatunssidfvfp.S

index 2a59fdb..d7a7024 100644 (file)
--- a/lib/libcompiler_rt/arm/floatunssidfvfp.S
+++ b/lib/libcompiler_rt/arm/floatunssidfvfp.S
@@ -19,9 +19,14 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__floatunssidfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vmov s0, r0
+       vcvt.f64.u32 d0, s0
+#else
         vmov    s15, r0        // move int to float register s15
         vcvt.f64.u32 d7, s15   // convert 32-bit int in s15 to double in d7
         vmov    r0, r1, d7     // move d7 to result register pair r0/r1
+#endif
         bx      lr
  END_COMPILERRT_FUNCTION(__floatunssidfvfp)
  
diff --git a/lib/libcompiler_rt/arm/floatunssisfvfp.S b/lib/libcompiler_rt/arm/floatunssisfvfp.S

index c096263..1ca8565 100644 (file)
--- a/lib/libcompiler_rt/arm/floatunssisfvfp.S
+++ b/lib/libcompiler_rt/arm/floatunssisfvfp.S
@@ -19,9 +19,14 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__floatunssisfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vmov s0, r0
+       vcvt.f32.u32 s0, s0
+#else
         vmov    s15, r0        // move int to float register s15
         vcvt.f32.u32 s15, s15  // convert 32-bit int in s15 to float in s15
         vmov    r0, s15        // move s15 to result register
+#endif
         bx      lr
  END_COMPILERRT_FUNCTION(__floatunssisfvfp)
  
diff --git a/lib/libcompiler_rt/arm/gedf2vfp.S b/lib/libcompiler_rt/arm/gedf2vfp.S

index 72f13ef..364fc5b 100644 (file)
--- a/lib/libcompiler_rt/arm/gedf2vfp.S
+++ b/lib/libcompiler_rt/arm/gedf2vfp.S
@@ -19,10 +19,15 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__gedf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vcmp.f64 d0, d1
+#else
         vmov    d6, r0, r1      // load r0/r1 pair in double register
         vmov    d7, r2, r3      // load r2/r3 pair in double register
         vcmp.f64 d6, d7
+#endif
         vmrs    apsr_nzcv, fpscr
+       ITE(ge)
         movge   r0, #1      // set result register to 1 if greater than or equal
         movlt   r0, #0
         bx      lr
diff --git a/lib/libcompiler_rt/arm/gesf2vfp.S b/lib/libcompiler_rt/arm/gesf2vfp.S

index c9ee52c..346c347 100644 (file)
--- a/lib/libcompiler_rt/arm/gesf2vfp.S
+++ b/lib/libcompiler_rt/arm/gesf2vfp.S
@@ -19,10 +19,15 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__gesf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vcmp.f32 s0, s1
+#else
         vmov    s14, r0     // move from GPR 0 to float register
         vmov    s15, r1     // move from GPR 1 to float register
         vcmp.f32 s14, s15
+#endif
         vmrs    apsr_nzcv, fpscr
+       ITE(ge)
         movge   r0, #1      // set result register to 1 if greater than or equal
         movlt   r0, #0
         bx      lr
diff --git a/lib/libcompiler_rt/arm/gtdf2vfp.S b/lib/libcompiler_rt/arm/gtdf2vfp.S

index c7f2775..3389c3a 100644 (file)
--- a/lib/libcompiler_rt/arm/gtdf2vfp.S
+++ b/lib/libcompiler_rt/arm/gtdf2vfp.S
@@ -19,10 +19,15 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__gtdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vcmp.f64 d0, d1
+#else
         vmov    d6, r0, r1      // load r0/r1 pair in double register
         vmov    d7, r2, r3      // load r2/r3 pair in double register
         vcmp.f64 d6, d7
+#endif
         vmrs    apsr_nzcv, fpscr
+       ITE(gt)
         movgt   r0, #1          // set result register to 1 if equal
         movle   r0, #0
         bx      lr
diff --git a/lib/libcompiler_rt/arm/gtsf2vfp.S b/lib/libcompiler_rt/arm/gtsf2vfp.S

index 7d49e45..afdba8b 100644 (file)
--- a/lib/libcompiler_rt/arm/gtsf2vfp.S
+++ b/lib/libcompiler_rt/arm/gtsf2vfp.S
@@ -19,10 +19,15 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__gtsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vcmp.f32 s0, s1
+#else
         vmov    s14, r0         // move from GPR 0 to float register
         vmov    s15, r1         // move from GPR 1 to float register
         vcmp.f32 s14, s15
+#endif
         vmrs    apsr_nzcv, fpscr
+       ITE(gt)
         movgt   r0, #1          // set result register to 1 if equal
         movle   r0, #0
         bx      lr
diff --git a/lib/libcompiler_rt/arm/ledf2vfp.S b/lib/libcompiler_rt/arm/ledf2vfp.S

index ca5b553..4bbe4c8 100644 (file)
--- a/lib/libcompiler_rt/arm/ledf2vfp.S
+++ b/lib/libcompiler_rt/arm/ledf2vfp.S
@@ -19,10 +19,15 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__ledf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vcmp.f64 d0, d1
+#else
         vmov    d6, r0, r1      // load r0/r1 pair in double register
         vmov    d7, r2, r3      // load r2/r3 pair in double register
         vcmp.f64 d6, d7
+#endif
         vmrs    apsr_nzcv, fpscr
+       ITE(ls)
         movls   r0, #1          // set result register to 1 if equal
         movhi   r0, #0
         bx      lr
diff --git a/lib/libcompiler_rt/arm/lesf2vfp.S b/lib/libcompiler_rt/arm/lesf2vfp.S

index f25422e..51232bd 100644 (file)
--- a/lib/libcompiler_rt/arm/lesf2vfp.S
+++ b/lib/libcompiler_rt/arm/lesf2vfp.S
@@ -19,10 +19,15 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__lesf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vcmp.f32 s0, s1
+#else
         vmov    s14, r0     // move from GPR 0 to float register
         vmov    s15, r1     // move from GPR 1 to float register
         vcmp.f32 s14, s15
+#endif
         vmrs    apsr_nzcv, fpscr
+       ITE(ls)
         movls   r0, #1      // set result register to 1 if equal
         movhi   r0, #0
         bx      lr
diff --git a/lib/libcompiler_rt/arm/ltdf2vfp.S b/lib/libcompiler_rt/arm/ltdf2vfp.S

index 6e2c099..8e2928c 100644 (file)
--- a/lib/libcompiler_rt/arm/ltdf2vfp.S
+++ b/lib/libcompiler_rt/arm/ltdf2vfp.S
@@ -19,10 +19,15 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__ltdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vcmp.f64 d0, d1
+#else
         vmov    d6, r0, r1      // load r0/r1 pair in double register
         vmov    d7, r2, r3      // load r2/r3 pair in double register
         vcmp.f64 d6, d7
+#endif
         vmrs    apsr_nzcv, fpscr
+       ITE(mi)
         movmi   r0, #1          // set result register to 1 if equal
         movpl   r0, #0
         bx      lr
diff --git a/lib/libcompiler_rt/arm/ltsf2vfp.S b/lib/libcompiler_rt/arm/ltsf2vfp.S

index 95febb6..59c00c6 100644 (file)
--- a/lib/libcompiler_rt/arm/ltsf2vfp.S
+++ b/lib/libcompiler_rt/arm/ltsf2vfp.S
@@ -19,10 +19,15 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__ltsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vcmp.f32 s0, s1
+#else
         vmov    s14, r0     // move from GPR 0 to float register
         vmov    s15, r1     // move from GPR 1 to float register
         vcmp.f32 s14, s15
+#endif
         vmrs    apsr_nzcv, fpscr
+       ITE(mi)
         movmi   r0, #1      // set result register to 1 if equal
         movpl   r0, #0
         bx      lr
diff --git a/lib/libcompiler_rt/arm/muldf3vfp.S b/lib/libcompiler_rt/arm/muldf3vfp.S

index f638de1..aa7b234 100644 (file)
--- a/lib/libcompiler_rt/arm/muldf3vfp.S
+++ b/lib/libcompiler_rt/arm/muldf3vfp.S
@@ -18,10 +18,14 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__muldf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vmul.f64 d0, d0, d1
+#else
         vmov    d6, r0, r1         // move first param from r0/r1 pair into d6
         vmov    d7, r2, r3         // move second param from r2/r3 pair into d7
-       vmul.f64 d6, d6, d7             
+       vmul.f64 d6, d6, d7
         vmov    r0, r1, d6         // move result back to r0/r1 pair
+#endif
         bx      lr
  END_COMPILERRT_FUNCTION(__muldf3vfp)
  
diff --git a/lib/libcompiler_rt/arm/mulsf3vfp.S b/lib/libcompiler_rt/arm/mulsf3vfp.S

index bef58d3..a1da789 100644 (file)
--- a/lib/libcompiler_rt/arm/mulsf3vfp.S
+++ b/lib/libcompiler_rt/arm/mulsf3vfp.S
@@ -18,9 +18,13 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__mulsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vmul.f32 s0, s0, s1
+#else
         vmov    s14, r0         // move first param from r0 into float register
         vmov    s15, r1         // move second param from r1 into float register
         vmul.f32 s13, s14, s15
+#endif
         vmov    r0, s13         // move result back to r0
         bx      lr
  END_COMPILERRT_FUNCTION(__mulsf3vfp)
diff --git a/lib/libcompiler_rt/arm/nedf2vfp.S b/lib/libcompiler_rt/arm/nedf2vfp.S

index 78cf529..aef72eb 100644 (file)
--- a/lib/libcompiler_rt/arm/nedf2vfp.S
+++ b/lib/libcompiler_rt/arm/nedf2vfp.S
@@ -19,10 +19,15 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__nedf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vcmp.f64 d0, d1
+#else
         vmov    d6, r0, r1      // load r0/r1 pair in double register
         vmov    d7, r2, r3      // load r2/r3 pair in double register
         vcmp.f64 d6, d7         
+#endif
         vmrs    apsr_nzcv, fpscr
+       ITE(ne)
         movne   r0, #1          // set result register to 0 if unequal
         moveq   r0, #0
         bx      lr
diff --git a/lib/libcompiler_rt/arm/negdf2vfp.S b/lib/libcompiler_rt/arm/negdf2vfp.S

index 01c8ba6..81f0ab8 100644 (file)
--- a/lib/libcompiler_rt/arm/negdf2vfp.S
+++ b/lib/libcompiler_rt/arm/negdf2vfp.S
@@ -18,7 +18,11 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__negdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vneg.f64 d0, d0
+#else
         eor     r1, r1, #-2147483648    // flip sign bit on double in r0/r1 pair
+#endif
         bx      lr
  END_COMPILERRT_FUNCTION(__negdf2vfp)
  
diff --git a/lib/libcompiler_rt/arm/negsf2vfp.S b/lib/libcompiler_rt/arm/negsf2vfp.S

index 797abb3..46ab4a9 100644 (file)
--- a/lib/libcompiler_rt/arm/negsf2vfp.S
+++ b/lib/libcompiler_rt/arm/negsf2vfp.S
@@ -18,7 +18,11 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__negsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vneg.f32 s0, s0
+#else
         eor     r0, r0, #-2147483648    // flip sign bit on float in r0
+#endif
         bx      lr
  END_COMPILERRT_FUNCTION(__negsf2vfp)
  
diff --git a/lib/libcompiler_rt/arm/nesf2vfp.S b/lib/libcompiler_rt/arm/nesf2vfp.S

index 554d3e4..50d60f4 100644 (file)
--- a/lib/libcompiler_rt/arm/nesf2vfp.S
+++ b/lib/libcompiler_rt/arm/nesf2vfp.S
@@ -19,10 +19,15 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__nesf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vcmp.f32 s0, s1
+#else
         vmov    s14, r0     // move from GPR 0 to float register
         vmov    s15, r1     // move from GPR 1 to float register
         vcmp.f32 s14, s15
+#endif
         vmrs    apsr_nzcv, fpscr
+       ITE(ne)
         movne   r0, #1      // set result register to 1 if unequal
         moveq   r0, #0
         bx      lr
diff --git a/lib/libcompiler_rt/arm/subdf3vfp.S b/lib/libcompiler_rt/arm/subdf3vfp.S

index 1fc7d18..2b6f2bd 100644 (file)
--- a/lib/libcompiler_rt/arm/subdf3vfp.S
+++ b/lib/libcompiler_rt/arm/subdf3vfp.S
@@ -18,10 +18,14 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__subdf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vsub.f64 d0, d0, d1
+#else
         vmov    d6, r0, r1         // move first param from r0/r1 pair into d6
         vmov    d7, r2, r3         // move second param from r2/r3 pair into d7
         vsub.f64 d6, d6, d7             
         vmov    r0, r1, d6         // move result back to r0/r1 pair
+#endif
         bx      lr
  END_COMPILERRT_FUNCTION(__subdf3vfp)
  
diff --git a/lib/libcompiler_rt/arm/subsf3vfp.S b/lib/libcompiler_rt/arm/subsf3vfp.S

index 11fe386..3e83ea2 100644 (file)
--- a/lib/libcompiler_rt/arm/subsf3vfp.S
+++ b/lib/libcompiler_rt/arm/subsf3vfp.S
@@ -12,17 +12,21 @@
  //
  // extern float __subsf3vfp(float a, float b);
  //
-// Returns the difference between two single precision floating point numbers 
+// Returns the difference between two single precision floating point numbers
  // using the Darwin calling convention where single arguments are passsed
  // like 32-bit ints.
  //
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__subsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vsub.f32 s0, s0, s1
+#else
         vmov    s14, r0         // move first param from r0 into float register
         vmov    s15, r1         // move second param from r1 into float register
         vsub.f32 s14, s14, s15
         vmov    r0, s14         // move result back to r0
+#endif
         bx      lr
  END_COMPILERRT_FUNCTION(__subsf3vfp)
  
diff --git a/lib/libcompiler_rt/arm/truncdfsf2vfp.S b/lib/libcompiler_rt/arm/truncdfsf2vfp.S

index 04287ad..682e54d 100644 (file)
--- a/lib/libcompiler_rt/arm/truncdfsf2vfp.S
+++ b/lib/libcompiler_rt/arm/truncdfsf2vfp.S
@@ -19,9 +19,13 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__truncdfsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vcvt.f32.f64 s0, d0
+#else
         vmov    d7, r0, r1   // load double from r0/r1 pair
         vcvt.f32.f64 s15, d7 // convert double to single (trucate precision)
         vmov    r0, s15      // return result in r0
+#endif
         bx      lr
  END_COMPILERRT_FUNCTION(__truncdfsf2vfp)
  
diff --git a/lib/libcompiler_rt/arm/udivsi3.S b/lib/libcompiler_rt/arm/udivsi3.S

index 085f8fb..b97b308 100644 (file)
--- a/lib/libcompiler_rt/arm/udivsi3.S
+++ b/lib/libcompiler_rt/arm/udivsi3.S
@@ -37,15 +37,38 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
         beq     LOCAL_LABEL(divby0)
         udiv    r0, r0, r1
         bx      lr
-#else
+
+LOCAL_LABEL(divby0):
+       mov     r0, #0
+#  ifdef __ARM_EABI__
+       b       __aeabi_idiv0
+#  else
+       JMP(lr)
+#  endif
+
+#else /* ! __ARM_ARCH_EXT_IDIV__ */
         cmp     r1, #1
         bcc     LOCAL_LABEL(divby0)
+#if __ARM_ARCH_ISA_THUMB == 1
+       bne LOCAL_LABEL(num_neq_denom)
+       JMP(lr)
+LOCAL_LABEL(num_neq_denom):
+#else
         IT(eq)
         JMPc(lr, eq)
+#endif
         cmp     r0, r1
+#if __ARM_ARCH_ISA_THUMB == 1
+       bhs LOCAL_LABEL(num_ge_denom)
+       movs r0, #0
+       JMP(lr)
+LOCAL_LABEL(num_ge_denom):
+#else
         ITT(cc)
         movcc   r0, #0
         JMPc(lr, cc)
+#endif
+
         /*
          * Implement division using binary long division algorithm.
          *
@@ -62,7 +85,7 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
          * that (r0 << shift) < 2 * r1. The quotient is stored in r3.
          */
  
-#  ifdef __ARM_FEATURE_CLZ
+#  if defined(__ARM_FEATURE_CLZ)
         clz     ip, r0
         clz     r3, r1
         /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
@@ -77,49 +100,131 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
         sub     ip, ip, r3, lsl #3
         mov     r3, #0
         bx      ip
-#  else
+#  else /* No CLZ Feature */
  #    if __ARM_ARCH_ISA_THUMB == 2
  #    error THUMB mode requires CLZ or UDIV
  #    endif
+#    if __ARM_ARCH_ISA_THUMB == 1
+#      define BLOCK_SIZE 10
+#    else
+#      define BLOCK_SIZE 12
+#    endif
+
         mov     r2, r0
+#    if __ARM_ARCH_ISA_THUMB == 1
+       mov ip, r0
+       adr r0, LOCAL_LABEL(div0block)
+       adds r0, #1
+#    else
         adr     ip, LOCAL_LABEL(div0block)
-
-       lsr     r3, r2, #16
+#    endif
+       lsrs    r3, r2, #16
         cmp     r3, r1
+#    if __ARM_ARCH_ISA_THUMB == 1
+       blo LOCAL_LABEL(skip_16)
+       movs r2, r3
+       subs r0, r0, #(16 * BLOCK_SIZE)
+LOCAL_LABEL(skip_16):
+#    else
         movhs   r2, r3
-       subhs   ip, ip, #(16 * 12)
+       subhs   ip, ip, #(16 * BLOCK_SIZE)
+#    endif
  
-       lsr     r3, r2, #8
+       lsrs    r3, r2, #8
         cmp     r3, r1
+#    if __ARM_ARCH_ISA_THUMB == 1
+       blo LOCAL_LABEL(skip_8)
+       movs r2, r3
+       subs r0, r0, #(8 * BLOCK_SIZE)
+LOCAL_LABEL(skip_8):
+#    else
         movhs   r2, r3
-       subhs   ip, ip, #(8 * 12)
+       subhs   ip, ip, #(8 * BLOCK_SIZE)
+#    endif
  
-       lsr     r3, r2, #4
+       lsrs    r3, r2, #4
         cmp     r3, r1
+#    if __ARM_ARCH_ISA_THUMB == 1
+       blo LOCAL_LABEL(skip_4)
+       movs r2, r3
+       subs r0, r0, #(4 * BLOCK_SIZE)
+LOCAL_LABEL(skip_4):
+#    else
         movhs   r2, r3
-       subhs   ip, #(4 * 12)
+       subhs   ip, #(4 * BLOCK_SIZE)
+#    endif
  
-       lsr     r3, r2, #2
+       lsrs    r3, r2, #2
         cmp     r3, r1
+#    if __ARM_ARCH_ISA_THUMB == 1
+       blo LOCAL_LABEL(skip_2)
+       movs r2, r3
+       subs r0, r0, #(2 * BLOCK_SIZE)
+LOCAL_LABEL(skip_2):
+#    else
         movhs   r2, r3
-       subhs   ip, ip, #(2 * 12)
+       subhs   ip, ip, #(2 * BLOCK_SIZE)
+#    endif
  
         /* Last block, no need to update r2 or r3. */
+#    if __ARM_ARCH_ISA_THUMB == 1
+       lsrs r3, r2, #1
+       cmp r3, r1
+       blo LOCAL_LABEL(skip_1)
+       subs r0, r0, #(1 * BLOCK_SIZE)
+LOCAL_LABEL(skip_1):
+       movs r2, r0
+       mov r0, ip
+       movs r3, #0
+       JMP (r2)
+
+#    else
         cmp     r1, r2, lsr #1
-       subls   ip, ip, #(1 * 12)
+       subls   ip, ip, #(1 * BLOCK_SIZE)
  
-       mov     r3, #0
+       movs    r3, #0
  
         JMP(ip)
-#  endif
+#    endif
+#  endif /* __ARM_FEATURE_CLZ */
+
  
  #define        IMM     #
+       /* due to the range limit of branch in Thumb1, we have to place the
+                block closer */
+LOCAL_LABEL(divby0):
+       movs    r0, #0
+#      if defined(__ARM_EABI__)
+       push {r7, lr}
+       bl      __aeabi_idiv0 // due to relocation limit, can't use b.
+       pop  {r7, pc}
+#      else
+       JMP(lr)
+#      endif
  
+
+#if __ARM_ARCH_ISA_THUMB == 1
+#define block(shift)                                                           \
+       lsls r2, r1, IMM shift;                                                      \
+       cmp r0, r2;                                                                  \
+       blo LOCAL_LABEL(block_skip_##shift);                                         \
+       subs r0, r0, r2;                                                             \
+       LOCAL_LABEL(block_skip_##shift) :;                                           \
+       adcs r3, r3 /* same as ((r3 << 1) | Carry). Carry is set if r0 >= r2. */
+
+       /* TODO: if current location counter is not not word aligned, we don't
+                need the .p2align and nop */
+       /* Label div0block must be word-aligned. First align block 31 */
+       .p2align 2
+       nop /* Padding to align div0block as 31 blocks = 310 bytes */
+
+#else
  #define block(shift)                                                           \
         cmp     r0, r1, lsl IMM shift;                                         \
         ITT(hs);                                                               \
         WIDE(addhs)     r3, r3, IMM (1 << shift);                              \
         WIDE(subhs)     r0, r0, r1, lsl IMM shift
+#endif
  
         block(31)
         block(30)
@@ -159,14 +264,6 @@ LOCAL_LABEL(div0block):
         JMP(lr)
  #endif /* __ARM_ARCH_EXT_IDIV__ */
  
-LOCAL_LABEL(divby0):
-       mov     r0, #0
-#ifdef __ARM_EABI__
-       b       __aeabi_idiv0
-#else
-       JMP(lr)
-#endif
-
  END_COMPILERRT_FUNCTION(__udivsi3)
  
  NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/libcompiler_rt/arm/unorddf2vfp.S b/lib/libcompiler_rt/arm/unorddf2vfp.S

index 022dd7a..6625fa8 100644 (file)
--- a/lib/libcompiler_rt/arm/unorddf2vfp.S
+++ b/lib/libcompiler_rt/arm/unorddf2vfp.S
@@ -19,10 +19,15 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__unorddf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vcmp.f64 d0, d1
+#else
         vmov    d6, r0, r1      // load r0/r1 pair in double register
         vmov    d7, r2, r3      // load r2/r3 pair in double register
-       vcmp.f64 d6, d7         
+       vcmp.f64 d6, d7
+#endif
         vmrs    apsr_nzcv, fpscr
+       ITE(vs)
         movvs   r0, #1      // set result register to 1 if "overflow" (any NaNs)
         movvc   r0, #0
         bx      lr
diff --git a/lib/libcompiler_rt/arm/unordsf2vfp.S b/lib/libcompiler_rt/arm/unordsf2vfp.S

index 5ebdd3d..0b5da2b 100644 (file)
--- a/lib/libcompiler_rt/arm/unordsf2vfp.S
+++ b/lib/libcompiler_rt/arm/unordsf2vfp.S
@@ -19,10 +19,15 @@
         .syntax unified
         .p2align 2
  DEFINE_COMPILERRT_FUNCTION(__unordsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+       vcmp.f32 s0, s1
+#else
         vmov    s14, r0     // move from GPR 0 to float register
         vmov    s15, r1     // move from GPR 1 to float register
         vcmp.f32 s14, s15
+#endif
         vmrs    apsr_nzcv, fpscr
+       ITE(vs)
         movvs   r0, #1      // set result register to 1 if "overflow" (any NaNs)
         movvc   r0, #0
         bx      lr
diff --git a/lib/libcompiler_rt/arm64/Makefile.mk b/lib/libcompiler_rt/arm64/Makefile.mk

deleted file mode 100644 (file)

index 7f7e386..0000000
--- a/lib/libcompiler_rt/arm64/Makefile.mk
+++ /dev/null
@@ -1,20 +0,0 @@
-#===- lib/builtins/arm64/Makefile.mk -----------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs := 
-OnlyArchs := arm64 
-
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
-Implementation := Optimized
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
diff --git a/lib/libcompiler_rt/armv6m/Makefile.mk b/lib/libcompiler_rt/armv6m/Makefile.mk

deleted file mode 100644 (file)

index f3c1807..0000000
--- a/lib/libcompiler_rt/armv6m/Makefile.mk
+++ /dev/null
@@ -1,20 +0,0 @@
-#===- lib/builtins/arm/Makefile.mk -------------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs := 
-OnlyArchs := armv6m
-
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
-Implementation := Optimized
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
diff --git a/lib/libcompiler_rt/ashldi3.c b/lib/libcompiler_rt/ashldi3.c

index eb4698a..fcb0abd 100644 (file)
--- a/lib/libcompiler_rt/ashldi3.c
+++ b/lib/libcompiler_rt/ashldi3.c
@@ -18,8 +18,6 @@
  
  /* Precondition:  0 <= b < bits_in_dword */
  
-ARM_EABI_FNALIAS(llsl, ashldi3)
-
  COMPILER_RT_ABI di_int
  __ashldi3(di_int a, si_int b)
  {
@@ -41,3 +39,10 @@ __ashldi3(di_int a, si_int b)
      }
      return result.all;
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI di_int __aeabi_llsl(di_int a, si_int b) {
+  return __ashldi3(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/ashrdi3.c b/lib/libcompiler_rt/ashrdi3.c

index 14c878b..b4ab4c6 100644 (file)
--- a/lib/libcompiler_rt/ashrdi3.c
+++ b/lib/libcompiler_rt/ashrdi3.c
@@ -18,8 +18,6 @@
  
  /* Precondition:  0 <= b < bits_in_dword */
  
-ARM_EABI_FNALIAS(lasr, ashrdi3)
-
  COMPILER_RT_ABI di_int
  __ashrdi3(di_int a, si_int b)
  {
@@ -42,3 +40,10 @@ __ashrdi3(di_int a, si_int b)
      }
      return result.all;
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI di_int __aeabi_lasr(di_int a, si_int b) {
+  return __ashrdi3(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/assembly.h b/lib/libcompiler_rt/assembly.h

index 5fc74f6..b15da52 100644 (file)
--- a/lib/libcompiler_rt/assembly.h
+++ b/lib/libcompiler_rt/assembly.h
@@ -44,7 +44,8 @@
  #endif
  #define CONST_SECTION .section .rodata
  
-#if defined(__GNU__) || defined(__ANDROID__) || defined(__FreeBSD__)
+#if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \
+    defined(__linux__)
  #define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits
  #else
  #define NO_EXEC_STACK_DIRECTIVE
@@ -70,7 +71,7 @@
  #if defined(__ARM_ARCH_4T__) || __ARM_ARCH >= 5
  #define ARM_HAS_BX
  #endif
-#if !defined(__ARM_FEATURE_CLZ) &&                                             \
+#if !defined(__ARM_FEATURE_CLZ) && __ARM_ARCH_ISA_THUMB != 1 &&                \
      (__ARM_ARCH >= 6 || (__ARM_ARCH == 5 && !defined(__ARM_ARCH_5__)))
  #define __ARM_FEATURE_CLZ
  #endif
@@ -95,9 +96,11 @@
  #if __ARM_ARCH_ISA_THUMB == 2
  #define IT(cond)  it cond
  #define ITT(cond) itt cond
+#define ITE(cond) ite cond
  #else
  #define IT(cond)
  #define ITT(cond)
+#define ITE(cond)
  #endif
  
  #if __ARM_ARCH_ISA_THUMB == 2
@@ -149,6 +152,7 @@
  #define DEFINE_COMPILERRT_FUNCTION_ALIAS(name, target)                         \
    .globl SYMBOL_NAME(name) SEPARATOR                                           \
    SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \
+  DECLARE_SYMBOL_VISIBILITY(SYMBOL_NAME(name)) SEPARATOR                       \
    .set SYMBOL_NAME(name), SYMBOL_NAME(target) SEPARATOR
  
  #if defined(__ARM_EABI__)
diff --git a/lib/libcompiler_rt/atomic.c b/lib/libcompiler_rt/atomic.c

index f1ddc3e..ee35e34 100644 (file)
--- a/lib/libcompiler_rt/atomic.c
+++ b/lib/libcompiler_rt/atomic.c
@@ -229,13 +229,20 @@ void __atomic_exchange_c(int size, void *ptr, void *val, void *old, int model) {
  // Where the size is known at compile time, the compiler may emit calls to
  // specialised versions of the above functions.
  ////////////////////////////////////////////////////////////////////////////////
+#ifdef __SIZEOF_INT128__
  #define OPTIMISED_CASES\
    OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t)\
    OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t)\
    OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t)\
    OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t)\
-  /* FIXME: __uint128_t isn't available on 32 bit platforms.
-  OPTIMISED_CASE(16, IS_LOCK_FREE_16, __uint128_t)*/\
+  OPTIMISED_CASE(16, IS_LOCK_FREE_16, __uint128_t)
+#else
+#define OPTIMISED_CASES\
+  OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t)\
+  OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t)\
+  OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t)\
+  OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t)
+#endif
  
  #define OPTIMISED_CASE(n, lockfree, type)\
  type __atomic_load_##n(type *src, int model) {\
diff --git a/lib/libcompiler_rt/bswapdi2.c b/lib/libcompiler_rt/bswapdi2.c

new file mode 100644 (file)

index 0000000..eb22000
--- /dev/null
+++ b/lib/libcompiler_rt/bswapdi2.c
@@ -0,0 +1,27 @@
+/* ===-- bswapdi2.c - Implement __bswapdi2 ---------------------------------===
+ *
+ *               The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __bswapdi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI uint64_t __bswapdi2(uint64_t u) {
+  return (
+      (((u)&0xff00000000000000ULL) >> 56) |
+      (((u)&0x00ff000000000000ULL) >> 40) |
+      (((u)&0x0000ff0000000000ULL) >> 24) |
+      (((u)&0x000000ff00000000ULL) >> 8)  |
+      (((u)&0x00000000ff000000ULL) << 8)  |
+      (((u)&0x0000000000ff0000ULL) << 24) |
+      (((u)&0x000000000000ff00ULL) << 40) |
+      (((u)&0x00000000000000ffULL) << 56));
+}
diff --git a/lib/libcompiler_rt/bswapsi2.c b/lib/libcompiler_rt/bswapsi2.c

new file mode 100644 (file)

index 0000000..5d941e6
--- /dev/null
+++ b/lib/libcompiler_rt/bswapsi2.c
@@ -0,0 +1,23 @@
+/* ===-- bswapsi2.c - Implement __bswapsi2 ---------------------------------===
+ *
+ *               The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __bswapsi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI uint32_t __bswapsi2(uint32_t u) {
+  return (
+      (((u)&0xff000000) >> 24) |
+      (((u)&0x00ff0000) >> 8)  |
+      (((u)&0x0000ff00) << 8)  |
+      (((u)&0x000000ff) << 24));
+}
diff --git a/lib/libcompiler_rt/clear_cache.c b/lib/libcompiler_rt/clear_cache.c

index 55bbdd3..25570fc 100644 (file)
--- a/lib/libcompiler_rt/clear_cache.c
+++ b/lib/libcompiler_rt/clear_cache.c
@@ -82,10 +82,6 @@ uintptr_t GetCurrentProcess(void);
    #endif
  #endif
  
-#if defined(__linux__) && defined(__arm__)
-  #include <asm/unistd.h>
-#endif
-
  /*
   * The compiler generates calls to __clear_cache() when creating 
   * trampoline functions on the stack for use with nested functions.
@@ -94,7 +90,7 @@ uintptr_t GetCurrentProcess(void);
   */
  
  void __clear_cache(void *start, void *end) {
-#if __i386__ || __x86_64__
+#if __i386__ || __x86_64__ || defined(_M_IX86) || defined(_M_X64)
  /*
   * Intel processors have a unified instruction and data cache
   * so there is nothing to do
@@ -108,12 +104,23 @@ void __clear_cache(void *start, void *end) {
  
          sysarch(ARM_SYNC_ICACHE, &arg);
      #elif defined(__linux__)
+    /*
+     * We used to include asm/unistd.h for the __ARM_NR_cacheflush define, but
+     * it also brought many other unused defines, as well as a dependency on
+     * kernel headers to be installed.
+     *
+     * This value is stable at least since Linux 3.13 and should remain so for
+     * compatibility reasons, warranting it's re-definition here.
+     */
+    #define __ARM_NR_cacheflush 0x0f0002
           register int start_reg __asm("r0") = (int) (intptr_t) start;
           const register int end_reg __asm("r1") = (int) (intptr_t) end;
+         const register int flags __asm("r2") = 0;
           const register int syscall_nr __asm("r7") = __ARM_NR_cacheflush;
           __asm __volatile("svc 0x0"
                            : "=r"(start_reg)
-                          : "r"(syscall_nr), "r"(start_reg), "r"(end_reg));
+                          : "r"(syscall_nr), "r"(start_reg), "r"(end_reg),
+                            "r"(flags));
           if (start_reg != 0) {
               compilerrt_abort();
           }
diff --git a/lib/libcompiler_rt/comparedf2.c b/lib/libcompiler_rt/comparedf2.c

index 9e29752..c5bb169 100644 (file)
--- a/lib/libcompiler_rt/comparedf2.c
+++ b/lib/libcompiler_rt/comparedf2.c
@@ -113,8 +113,6 @@ __gedf2(fp_t a, fp_t b) {
      }
  }
  
-ARM_EABI_FNALIAS(dcmpun, unorddf2)
-
  COMPILER_RT_ABI int
  __unorddf2(fp_t a, fp_t b) {
      const rep_t aAbs = toRep(a) & absMask;
@@ -144,3 +142,9 @@ __gtdf2(fp_t a, fp_t b) {
      return __gedf2(a, b);
  }
  
+#if defined(__ARM_EABI__)
+AEABI_RTABI int __aeabi_dcmpun(fp_t a, fp_t b) {
+  return __unorddf2(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/comparesf2.c b/lib/libcompiler_rt/comparesf2.c

index 1fd5063..4badb5e 100644 (file)
--- a/lib/libcompiler_rt/comparesf2.c
+++ b/lib/libcompiler_rt/comparesf2.c
@@ -113,8 +113,6 @@ __gesf2(fp_t a, fp_t b) {
      }
  }
  
-ARM_EABI_FNALIAS(fcmpun, unordsf2)
-
  COMPILER_RT_ABI int
  __unordsf2(fp_t a, fp_t b) {
      const rep_t aAbs = toRep(a) & absMask;
@@ -143,3 +141,10 @@ COMPILER_RT_ABI enum GE_RESULT
  __gtsf2(fp_t a, fp_t b) {
      return __gesf2(a, b);
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI int __aeabi_fcmpun(fp_t a, fp_t b) {
+  return __unordsf2(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/cpu_model.c b/lib/libcompiler_rt/cpu_model.c

index 9a37370..83ea7a4 100644 (file)
--- a/lib/libcompiler_rt/cpu_model.c
+++ b/lib/libcompiler_rt/cpu_model.c
@@ -27,6 +27,10 @@
  #include <intrin.h>
  #endif
  
+#ifndef __has_attribute
+#define __has_attribute(attr) 0
+#endif
+
  enum VendorSignatures {
    SIG_INTEL = 0x756e6547 /* Genu */,
    SIG_AMD = 0x68747541 /* Auth */
@@ -40,29 +44,16 @@ enum ProcessorVendors {
  };
  
  enum ProcessorTypes {
-  INTEL_ATOM = 1,
+  INTEL_BONNELL = 1,
    INTEL_CORE2,
    INTEL_COREI7,
    AMDFAM10H,
    AMDFAM15H,
-  INTEL_i386,
-  INTEL_i486,
-  INTEL_PENTIUM,
-  INTEL_PENTIUM_PRO,
-  INTEL_PENTIUM_II,
-  INTEL_PENTIUM_III,
-  INTEL_PENTIUM_IV,
-  INTEL_PENTIUM_M,
-  INTEL_CORE_DUO,
-  INTEL_XEONPHI,
-  INTEL_X86_64,
-  INTEL_NOCONA,
-  INTEL_PRESCOTT,
-  AMD_i486,
-  AMDPENTIUM,
-  AMDATHLON,
-  AMDFAM14H,
-  AMDFAM16H,
+  INTEL_SILVERMONT,
+  INTEL_KNL,
+  AMD_BTVER1,
+  AMD_BTVER2,
+  AMDFAM17H,
    CPU_TYPE_MAX
  };
  
@@ -75,32 +66,14 @@ enum ProcessorSubtypes {
    AMDFAM10H_ISTANBUL,
    AMDFAM15H_BDVER1,
    AMDFAM15H_BDVER2,
-  INTEL_PENTIUM_MMX,
-  INTEL_CORE2_65,
-  INTEL_CORE2_45,
+  AMDFAM15H_BDVER3,
+  AMDFAM15H_BDVER4,
+  AMDFAM17H_ZNVER1,
    INTEL_COREI7_IVYBRIDGE,
    INTEL_COREI7_HASWELL,
    INTEL_COREI7_BROADWELL,
    INTEL_COREI7_SKYLAKE,
    INTEL_COREI7_SKYLAKE_AVX512,
-  INTEL_ATOM_BONNELL,
-  INTEL_ATOM_SILVERMONT,
-  INTEL_KNIGHTS_LANDING,
-  AMDPENTIUM_K6,
-  AMDPENTIUM_K62,
-  AMDPENTIUM_K63,
-  AMDPENTIUM_GEODE,
-  AMDATHLON_TBIRD,
-  AMDATHLON_MP,
-  AMDATHLON_XP,
-  AMDATHLON_K8SSE3,
-  AMDATHLON_OPTERON,
-  AMDATHLON_FX,
-  AMDATHLON_64,
-  AMD_BTVER1,
-  AMD_BTVER2,
-  AMDFAM15H_BDVER3,
-  AMDFAM15H_BDVER4,
    CPU_SUBTYPE_MAX
  };
  
@@ -116,11 +89,26 @@ enum ProcessorFeatures {
    FEATURE_SSE4_2,
    FEATURE_AVX,
    FEATURE_AVX2,
-  FEATURE_AVX512,
-  FEATURE_AVX512SAVE,
-  FEATURE_MOVBE,
-  FEATURE_ADX,
-  FEATURE_EM64T
+  FEATURE_SSE4_A,
+  FEATURE_FMA4,
+  FEATURE_XOP,
+  FEATURE_FMA,
+  FEATURE_AVX512F,
+  FEATURE_BMI,
+  FEATURE_BMI2,
+  FEATURE_AES,
+  FEATURE_PCLMUL,
+  FEATURE_AVX512VL,
+  FEATURE_AVX512BW,
+  FEATURE_AVX512DQ,
+  FEATURE_AVX512CD,
+  FEATURE_AVX512ER,
+  FEATURE_AVX512PF,
+  FEATURE_AVX512VBMI,
+  FEATURE_AVX512IFMA,
+  FEATURE_AVX5124VNNIW,
+  FEATURE_AVX5124FMAPS,
+  FEATURE_AVX512VPOPCNTDQ
  };
  
  // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
@@ -160,26 +148,27 @@ static bool isCpuIdSupported() {
  
  /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
  /// the specified arguments.  If we can't run cpuid on the host, return true.
-static void getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
+static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
                                 unsigned *rECX, unsigned *rEDX) {
  #if defined(__GNUC__) || defined(__clang__)
  #if defined(__x86_64__)
-  // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
+  // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
+  // FIXME: should we save this for Clang?
    __asm__("movq\t%%rbx, %%rsi\n\t"
            "cpuid\n\t"
            "xchgq\t%%rbx, %%rsi\n\t"
            : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
            : "a"(value));
+  return false;
  #elif defined(__i386__)
    __asm__("movl\t%%ebx, %%esi\n\t"
            "cpuid\n\t"
            "xchgl\t%%ebx, %%esi\n\t"
            : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
            : "a"(value));
-// pedantic #else returns to appease -Wunreachable-code (so we don't generate
-// postprocessed code that looks like "return true; return false;")
+  return false;
  #else
-  assert(0 && "This method is defined only for x86.");
+  return true;
  #endif
  #elif defined(_MSC_VER)
    // The MSVC intrinsic is portable across x86 and x64.
@@ -189,19 +178,20 @@ static void getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
    *rEBX = registers[1];
    *rECX = registers[2];
    *rEDX = registers[3];
+  return false;
  #else
-  assert(0 && "This method is defined only for GNUC, Clang or MSVC.");
+  return true;
  #endif
  }
  
  /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
  /// the 4 values in the specified arguments.  If we can't run cpuid on the host,
  /// return true.
-static void getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
+static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
                                   unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
                                   unsigned *rEDX) {
-#if defined(__x86_64__) || defined(_M_X64)
  #if defined(__GNUC__) || defined(__clang__)
+#if defined(__x86_64__)
    // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
    // FIXME: should we save this for Clang?
    __asm__("movq\t%%rbx, %%rsi\n\t"
@@ -209,42 +199,27 @@ static void getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
            "xchgq\t%%rbx, %%rsi\n\t"
            : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
            : "a"(value), "c"(subleaf));
-#elif defined(_MSC_VER)
-  int registers[4];
-  __cpuidex(registers, value, subleaf);
-  *rEAX = registers[0];
-  *rEBX = registers[1];
-  *rECX = registers[2];
-  *rEDX = registers[3];
-#else
-  assert(0 && "This method is defined only for GNUC, Clang or MSVC.");
-#endif
-#elif defined(__i386__) || defined(_M_IX86)
-#if defined(__GNUC__) || defined(__clang__)
+  return false;
+#elif defined(__i386__)
    __asm__("movl\t%%ebx, %%esi\n\t"
            "cpuid\n\t"
            "xchgl\t%%ebx, %%esi\n\t"
            : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
            : "a"(value), "c"(subleaf));
-#elif defined(_MSC_VER)
-  __asm {
-      mov   eax,value
-      mov   ecx,subleaf
-      cpuid
-      mov   esi,rEAX
-      mov   dword ptr [esi],eax
-      mov   esi,rEBX
-      mov   dword ptr [esi],ebx
-      mov   esi,rECX
-      mov   dword ptr [esi],ecx
-      mov   esi,rEDX
-      mov   dword ptr [esi],edx
-  }
+  return false;
  #else
-  assert(0 && "This method is defined only for GNUC, Clang or MSVC.");
+  return true;
  #endif
+#elif defined(_MSC_VER)
+  int registers[4];
+  __cpuidex(registers, value, subleaf);
+  *rEAX = registers[0];
+  *rEBX = registers[1];
+  *rECX = registers[2];
+  *rEDX = registers[3];
+  return false;
  #else
-  assert(0 && "This method is defined only for x86.");
+  return true;
  #endif
  }
  
@@ -279,84 +254,15 @@ static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
    }
  }
  
-static void getIntelProcessorTypeAndSubtype(unsigned int Family,
-                                            unsigned int Model,
-                                            unsigned int Brand_id,
-                                            unsigned int Features,
-                                            unsigned *Type, unsigned *Subtype) {
+static void
+getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
+                                unsigned Brand_id, unsigned Features,
+                                unsigned *Type, unsigned *Subtype) {
    if (Brand_id != 0)
      return;
    switch (Family) {
-  case 3:
-    *Type = INTEL_i386;
-    break;
-  case 4:
-    switch (Model) {
-    case 0: // Intel486 DX processors
-    case 1: // Intel486 DX processors
-    case 2: // Intel486 SX processors
-    case 3: // Intel487 processors, IntelDX2 OverDrive processors,
-            // IntelDX2 processors
-    case 4: // Intel486 SL processor
-    case 5: // IntelSX2 processors
-    case 7: // Write-Back Enhanced IntelDX2 processors
-    case 8: // IntelDX4 OverDrive processors, IntelDX4 processors
-    default:
-      *Type = INTEL_i486;
-      break;
-    }
-  case 5:
-    switch (Model) {
-    case 1: // Pentium OverDrive processor for Pentium processor (60, 66),
-            // Pentium processors (60, 66)
-    case 2: // Pentium OverDrive processor for Pentium processor (75, 90,
-            // 100, 120, 133), Pentium processors (75, 90, 100, 120, 133,
-            // 150, 166, 200)
-    case 3: // Pentium OverDrive processors for Intel486 processor-based
-            // systems
-      *Type = INTEL_PENTIUM;
-      break;
-    case 4: // Pentium OverDrive processor with MMX technology for Pentium
-            // processor (75, 90, 100, 120, 133), Pentium processor with
-            // MMX technology (166, 200)
-      *Type = INTEL_PENTIUM;
-      *Subtype = INTEL_PENTIUM_MMX;
-      break;
-    default:
-      *Type = INTEL_PENTIUM;
-      break;
-    }
    case 6:
      switch (Model) {
-    case 0x01: // Pentium Pro processor
-      *Type = INTEL_PENTIUM_PRO;
-      break;
-    case 0x03: // Intel Pentium II OverDrive processor, Pentium II processor,
-               // model 03
-    case 0x05: // Pentium II processor, model 05, Pentium II Xeon processor,
-               // model 05, and Intel Celeron processor, model 05
-    case 0x06: // Celeron processor, model 06
-      *Type = INTEL_PENTIUM_II;
-      break;
-    case 0x07: // Pentium III processor, model 07, and Pentium III Xeon
-               // processor, model 07
-    case 0x08: // Pentium III processor, model 08, Pentium III Xeon processor,
-               // model 08, and Celeron processor, model 08
-    case 0x0a: // Pentium III Xeon processor, model 0Ah
-    case 0x0b: // Pentium III processor, model 0Bh
-      *Type = INTEL_PENTIUM_III;
-      break;
-    case 0x09: // Intel Pentium M processor, Intel Celeron M processor model 09.
-    case 0x0d: // Intel Pentium M processor, Intel Celeron M processor, model
-               // 0Dh. All processors are manufactured using the 90 nm process.
-    case 0x15: // Intel EP80579 Integrated Processor and Intel EP80579
-               // Integrated Processor with Intel QuickAssist Technology
-      *Type = INTEL_PENTIUM_M;
-      break;
-    case 0x0e: // Intel Core Duo processor, Intel Core Solo processor, model
-               // 0Eh. All processors are manufactured using the 65 nm process.
-      *Type = INTEL_CORE_DUO;
-      break;   // yonah
      case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
                 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
                 // mobile processor, Intel Core 2 Extreme processor, Intel
@@ -364,9 +270,6 @@ static void getIntelProcessorTypeAndSubtype(unsigned int Family,
                 // 0Fh. All processors are manufactured using the 65 nm process.
      case 0x16: // Intel Celeron processor model 16h. All processors are
                 // manufactured using the 65 nm process
-      *Type = INTEL_CORE2; // "core2"
-      *Subtype = INTEL_CORE2_65;
-      break;
      case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
                 // 17h. All processors are manufactured using the 45 nm process.
                 //
@@ -374,14 +277,13 @@ static void getIntelProcessorTypeAndSubtype(unsigned int Family,
      case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
                 // the 45 nm process.
        *Type = INTEL_CORE2; // "penryn"
-      *Subtype = INTEL_CORE2_45;
        break;
      case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
                 // processors are manufactured using the 45 nm process.
      case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
                 // As found in a Summer 2010 model iMac.
      case 0x1f:
-    case 0x2e:              // Nehalem EX
+    case 0x2e:             // Nehalem EX
        *Type = INTEL_COREI7; // "nehalem"
        *Subtype = INTEL_COREI7_NEHALEM;
        break;
@@ -399,7 +301,7 @@ static void getIntelProcessorTypeAndSubtype(unsigned int Family,
        *Subtype = INTEL_COREI7_SANDYBRIDGE;
        break;
      case 0x3a:
-    case 0x3e:              // Ivy Bridge EP
+    case 0x3e:             // Ivy Bridge EP
        *Type = INTEL_COREI7; // "ivybridge"
        *Subtype = INTEL_COREI7_IVYBRIDGE;
        break;
@@ -423,22 +325,26 @@ static void getIntelProcessorTypeAndSubtype(unsigned int Family,
        break;
  
      // Skylake:
-    case 0x4e:
-      *Type = INTEL_COREI7; // "skylake-avx512"
-      *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
-      break;
-    case 0x5e:
+    case 0x4e: // Skylake mobile
+    case 0x5e: // Skylake desktop
+    case 0x8e: // Kaby Lake mobile
+    case 0x9e: // Kaby Lake desktop
        *Type = INTEL_COREI7; // "skylake"
        *Subtype = INTEL_COREI7_SKYLAKE;
        break;
  
+    // Skylake Xeon:
+    case 0x55:
+      *Type = INTEL_COREI7;
+      *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
+      break;
+
      case 0x1c: // Most 45 nm Intel Atom processors
      case 0x26: // 45 nm Atom Lincroft
      case 0x27: // 32 nm Atom Medfield
      case 0x35: // 32 nm Atom Midview
      case 0x36: // 32 nm Atom Midview
-      *Type = INTEL_ATOM;
-      *Subtype = INTEL_ATOM_BONNELL;
+      *Type = INTEL_BONNELL;
        break; // "bonnell"
  
      // Atom Silvermont codes from the Intel software optimization guide.
@@ -448,185 +354,29 @@ static void getIntelProcessorTypeAndSubtype(unsigned int Family,
      case 0x5a:
      case 0x5d:
      case 0x4c: // really airmont
-      *Type = INTEL_ATOM;
-      *Subtype = INTEL_ATOM_SILVERMONT;
+      *Type = INTEL_SILVERMONT;
        break; // "silvermont"
  
      case 0x57:
-      *Type = INTEL_XEONPHI; // knl
-      *Subtype = INTEL_KNIGHTS_LANDING;
+      *Type = INTEL_KNL; // knl
        break;
  
-    default: // Unknown family 6 CPU, try to guess.
-      if (Features & (1 << FEATURE_AVX512)) {
-        *Type = INTEL_XEONPHI; // knl
-        *Subtype = INTEL_KNIGHTS_LANDING;
-        break;
-      }
-      if (Features & (1 << FEATURE_ADX)) {
-        *Type = INTEL_COREI7;
-        *Subtype = INTEL_COREI7_BROADWELL;
-        break;
-      }
-      if (Features & (1 << FEATURE_AVX2)) {
-        *Type = INTEL_COREI7;
-        *Subtype = INTEL_COREI7_HASWELL;
-        break;
-      }
-      if (Features & (1 << FEATURE_AVX)) {
-        *Type = INTEL_COREI7;
-        *Subtype = INTEL_COREI7_SANDYBRIDGE;
-        break;
-      }
-      if (Features & (1 << FEATURE_SSE4_2)) {
-        if (Features & (1 << FEATURE_MOVBE)) {
-          *Type = INTEL_ATOM;
-          *Subtype = INTEL_ATOM_SILVERMONT;
-        } else {
-          *Type = INTEL_COREI7;
-          *Subtype = INTEL_COREI7_NEHALEM;
-        }
-        break;
-      }
-      if (Features & (1 << FEATURE_SSE4_1)) {
-        *Type = INTEL_CORE2; // "penryn"
-        *Subtype = INTEL_CORE2_45;
-        break;
-      }
-      if (Features & (1 << FEATURE_SSSE3)) {
-        if (Features & (1 << FEATURE_MOVBE)) {
-          *Type = INTEL_ATOM;
-          *Subtype = INTEL_ATOM_BONNELL; // "bonnell"
-        } else {
-          *Type = INTEL_CORE2; // "core2"
-          *Subtype = INTEL_CORE2_65;
-        }
-        break;
-      }
-      if (Features & (1 << FEATURE_EM64T)) {
-        *Type = INTEL_X86_64;
-        break; // x86-64
-      }
-      if (Features & (1 << FEATURE_SSE2)) {
-        *Type = INTEL_PENTIUM_M;
-        break;
-      }
-      if (Features & (1 << FEATURE_SSE)) {
-        *Type = INTEL_PENTIUM_III;
-        break;
-      }
-      if (Features & (1 << FEATURE_MMX)) {
-        *Type = INTEL_PENTIUM_II;
-        break;
-      }
-      *Type = INTEL_PENTIUM_PRO;
-      break;
-    }
-  case 15: {
-    switch (Model) {
-    case 0: // Pentium 4 processor, Intel Xeon processor. All processors are
-            // model 00h and manufactured using the 0.18 micron process.
-    case 1: // Pentium 4 processor, Intel Xeon processor, Intel Xeon
-            // processor MP, and Intel Celeron processor. All processors are
-            // model 01h and manufactured using the 0.18 micron process.
-    case 2: // Pentium 4 processor, Mobile Intel Pentium 4 processor - M,
-            // Intel Xeon processor, Intel Xeon processor MP, Intel Celeron
-            // processor, and Mobile Intel Celeron processor. All processors
-            // are model 02h and manufactured using the 0.13 micron process.
-      *Type =
-          ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV);
-      break;
-
-    case 3: // Pentium 4 processor, Intel Xeon processor, Intel Celeron D
-            // processor. All processors are model 03h and manufactured using
-            // the 90 nm process.
-    case 4: // Pentium 4 processor, Pentium 4 processor Extreme Edition,
-            // Pentium D processor, Intel Xeon processor, Intel Xeon
-            // processor MP, Intel Celeron D processor. All processors are
-            // model 04h and manufactured using the 90 nm process.
-    case 6: // Pentium 4 processor, Pentium D processor, Pentium processor
-            // Extreme Edition, Intel Xeon processor, Intel Xeon processor
-            // MP, Intel Celeron D processor. All processors are model 06h
-            // and manufactured using the 65 nm process.
-      *Type =
-          ((Features & (1 << FEATURE_EM64T)) ? INTEL_NOCONA : INTEL_PRESCOTT);
-      break;
-
-    default:
-      *Type =
-          ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV);
+    default: // Unknown family 6 CPU.
        break;
+    break;
      }
-  }
    default:
-    break; /*"generic"*/
+    break; // Unknown.
    }
  }
  
-static void getAMDProcessorTypeAndSubtype(unsigned int Family,
-                                          unsigned int Model,
-                                          unsigned int Features, unsigned *Type,
+static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
+                                          unsigned Features, unsigned *Type,
                                            unsigned *Subtype) {
    // FIXME: this poorly matches the generated SubtargetFeatureKV table.  There
    // appears to be no way to generate the wide variety of AMD-specific targets
    // from the information returned from CPUID.
    switch (Family) {
-  case 4:
-    *Type = AMD_i486;
-  case 5:
-    *Type = AMDPENTIUM;
-    switch (Model) {
-    case 6:
-    case 7:
-      *Subtype = AMDPENTIUM_K6;
-      break; // "k6"
-    case 8:
-      *Subtype = AMDPENTIUM_K62;
-      break; // "k6-2"
-    case 9:
-    case 13:
-      *Subtype = AMDPENTIUM_K63;
-      break; // "k6-3"
-    case 10:
-      *Subtype = AMDPENTIUM_GEODE;
-      break; // "geode"
-    default:
-      break;
-    }
-  case 6:
-    *Type = AMDATHLON;
-    switch (Model) {
-    case 4:
-      *Subtype = AMDATHLON_TBIRD;
-      break; // "athlon-tbird"
-    case 6:
-    case 7:
-    case 8:
-      *Subtype = AMDATHLON_MP;
-      break; // "athlon-mp"
-    case 10:
-      *Subtype = AMDATHLON_XP;
-      break; // "athlon-xp"
-    default:
-      break;
-    }
-  case 15:
-    *Type = AMDATHLON;
-    if (Features & (1 << FEATURE_SSE3)) {
-      *Subtype = AMDATHLON_K8SSE3;
-      break; // "k8-sse3"
-    }
-    switch (Model) {
-    case 1:
-      *Subtype = AMDATHLON_OPTERON;
-      break; // "opteron"
-    case 5:
-      *Subtype = AMDATHLON_FX;
-      break; // "athlon-fx"; also opteron
-    default:
-      *Subtype = AMDATHLON_64;
-      break; // "athlon64"
-    }
    case 16:
      *Type = AMDFAM10H; // "amdfam10"
      switch (Model) {
@@ -639,23 +389,16 @@ static void getAMDProcessorTypeAndSubtype(unsigned int Family,
      case 8:
        *Subtype = AMDFAM10H_ISTANBUL;
        break;
-    default:
-      break;
      }
+    break;
    case 20:
-    *Type = AMDFAM14H;
-    *Subtype = AMD_BTVER1;
+    *Type = AMD_BTVER1;
      break; // "btver1";
    case 21:
      *Type = AMDFAM15H;
-    if (!(Features &
-          (1 << FEATURE_AVX))) { // If no AVX support, provide a sane fallback.
-      *Subtype = AMD_BTVER1;
-      break; // "btver1"
-    }
-    if (Model >= 0x50 && Model <= 0x6f) {
+    if (Model >= 0x60 && Model <= 0x7f) {
        *Subtype = AMDFAM15H_BDVER4;
-      break; // "bdver4"; 50h-6Fh: Excavator
+      break; // "bdver4"; 60h-7Fh: Excavator
      }
      if (Model >= 0x30 && Model <= 0x3f) {
        *Subtype = AMDFAM15H_BDVER3;
@@ -671,31 +414,47 @@ static void getAMDProcessorTypeAndSubtype(unsigned int Family,
      }
      break;
    case 22:
-    *Type = AMDFAM16H;
-    if (!(Features &
-          (1 << FEATURE_AVX))) { // If no AVX support provide a sane fallback.
-      *Subtype = AMD_BTVER1;
-      break; // "btver1";
-    }
-    *Subtype = AMD_BTVER2;
+    *Type = AMD_BTVER2;
      break; // "btver2"
+  case 23:
+    *Type = AMDFAM17H;
+    *Subtype = AMDFAM17H_ZNVER1;
+    break;
    default:
      break; // "generic"
    }
  }
  
-static unsigned getAvailableFeatures(unsigned int ECX, unsigned int EDX,
-                                     unsigned MaxLeaf) {
+static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
+                                 unsigned *FeaturesOut) {
    unsigned Features = 0;
-  unsigned int EAX, EBX;
-  Features |= (((EDX >> 23) & 1) << FEATURE_MMX);
-  Features |= (((EDX >> 25) & 1) << FEATURE_SSE);
-  Features |= (((EDX >> 26) & 1) << FEATURE_SSE2);
-  Features |= (((ECX >> 0) & 1) << FEATURE_SSE3);
-  Features |= (((ECX >> 9) & 1) << FEATURE_SSSE3);
-  Features |= (((ECX >> 19) & 1) << FEATURE_SSE4_1);
-  Features |= (((ECX >> 20) & 1) << FEATURE_SSE4_2);
-  Features |= (((ECX >> 22) & 1) << FEATURE_MOVBE);
+  unsigned EAX, EBX;
+
+  if ((EDX >> 15) & 1)
+    Features |= 1 << FEATURE_CMOV;
+  if ((EDX >> 23) & 1)
+    Features |= 1 << FEATURE_MMX;
+  if ((EDX >> 25) & 1)
+    Features |= 1 << FEATURE_SSE;
+  if ((EDX >> 26) & 1)
+    Features |= 1 << FEATURE_SSE2;
+
+  if ((ECX >> 0) & 1)
+    Features |= 1 << FEATURE_SSE3;
+  if ((ECX >> 1) & 1)
+    Features |= 1 << FEATURE_PCLMUL;
+  if ((ECX >> 9) & 1)
+    Features |= 1 << FEATURE_SSSE3;
+  if ((ECX >> 12) & 1)
+    Features |= 1 << FEATURE_FMA;
+  if ((ECX >> 19) & 1)
+    Features |= 1 << FEATURE_SSE4_1;
+  if ((ECX >> 20) & 1)
+    Features |= 1 << FEATURE_SSE4_2;
+  if ((ECX >> 23) & 1)
+    Features |= 1 << FEATURE_POPCNT;
+  if ((ECX >> 25) & 1)
+    Features |= 1 << FEATURE_AES;
  
    // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
    // indicates that the AVX registers will be saved and restored on context
@@ -704,30 +463,72 @@ static unsigned getAvailableFeatures(unsigned int ECX, unsigned int EDX,
    bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
                  ((EAX & 0x6) == 0x6);
    bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
-  bool HasLeaf7 = MaxLeaf >= 0x7;
-  getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
-  bool HasADX = HasLeaf7 && ((EBX >> 19) & 1);
-  bool HasAVX2 = HasAVX && HasLeaf7 && (EBX & 0x20);
-  bool HasAVX512 = HasLeaf7 && HasAVX512Save && ((EBX >> 16) & 1);
-  Features |= (HasAVX << FEATURE_AVX);
-  Features |= (HasAVX2 << FEATURE_AVX2);
-  Features |= (HasAVX512 << FEATURE_AVX512);
-  Features |= (HasAVX512Save << FEATURE_AVX512SAVE);
-  Features |= (HasADX << FEATURE_ADX);
-
-  getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
-  Features |= (((EDX >> 29) & 0x1) << FEATURE_EM64T);
-  return Features;
+
+  if (HasAVX)
+    Features |= 1 << FEATURE_AVX;
+
+  bool HasLeaf7 =
+      MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
+
+  if (HasLeaf7 && ((EBX >> 3) & 1))
+    Features |= 1 << FEATURE_BMI;
+  if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
+    Features |= 1 << FEATURE_AVX2;
+  if (HasLeaf7 && ((EBX >> 9) & 1))
+    Features |= 1 << FEATURE_BMI2;
+  if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512F;
+  if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512DQ;
+  if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512IFMA;
+  if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512PF;
+  if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512ER;
+  if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512CD;
+  if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512BW;
+  if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512VL;
+
+  if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512VBMI;
+  if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512VPOPCNTDQ;
+
+  if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX5124VNNIW;
+  if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX5124FMAPS;
+
+  unsigned MaxExtLevel;
+  getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
+
+  bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
+                     !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+  if (HasExtLeaf1 && ((ECX >> 6) & 1))
+    Features |= 1 << FEATURE_SSE4_A;
+  if (HasExtLeaf1 && ((ECX >> 11) & 1))
+    Features |= 1 << FEATURE_XOP;
+  if (HasExtLeaf1 && ((ECX >> 16) & 1))
+    Features |= 1 << FEATURE_FMA4;
+
+  *FeaturesOut = Features;
  }
  
-#ifdef HAVE_INIT_PRIORITY
-#define CONSTRUCTOR_PRIORITY (101)
+#if defined(HAVE_INIT_PRIORITY)
+#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101))
+#elif __has_attribute(__constructor__)
+#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__))
  #else
-#define CONSTRUCTOR_PRIORITY
+// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
+// this runs during initialization.
+#define CONSTRUCTOR_ATTRIBUTE
  #endif
  
-int __cpu_indicator_init(void)
-    __attribute__((constructor CONSTRUCTOR_PRIORITY));
+int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
  
  struct __processor_model {
    unsigned int __cpu_vendor;
@@ -742,13 +543,13 @@ struct __processor_model {
     the priority set.  However, it still runs after ifunc initializers and
     needs to be called explicitly there.  */
  
-int __attribute__((constructor CONSTRUCTOR_PRIORITY))
+int CONSTRUCTOR_ATTRIBUTE
  __cpu_indicator_init(void) {
-  unsigned int EAX, EBX, ECX, EDX;
-  unsigned int MaxLeaf = 5;
-  unsigned int Vendor;
-  unsigned int Model, Family, Brand_id;
-  unsigned int Features = 0;
+  unsigned EAX, EBX, ECX, EDX;
+  unsigned MaxLeaf = 5;
+  unsigned Vendor;
+  unsigned Model, Family, Brand_id;
+  unsigned Features = 0;
  
    /* This function needs to run just once.  */
    if (__cpu_model.__cpu_vendor)
@@ -758,9 +559,7 @@ __cpu_indicator_init(void) {
      return -1;
  
    /* Assume cpuid insn present. Run in level 0 to get vendor id. */
-  getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX);
-
-  if (MaxLeaf < 1) {
+  if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
      __cpu_model.__cpu_vendor = VENDOR_OTHER;
      return -1;
    }
@@ -769,7 +568,7 @@ __cpu_indicator_init(void) {
    Brand_id = EBX & 0xff;
  
    /* Find available features. */
-  Features = getAvailableFeatures(ECX, EDX, MaxLeaf);
+  getAvailableFeatures(ECX, EDX, MaxLeaf, &Features);
    __cpu_model.__cpu_features[0] = Features;
  
    if (Vendor == SIG_INTEL) {
diff --git a/lib/libcompiler_rt/divdf3.c b/lib/libcompiler_rt/divdf3.c

index ab44c2b..492e32b 100644 (file)
--- a/lib/libcompiler_rt/divdf3.c
+++ b/lib/libcompiler_rt/divdf3.c
@@ -19,8 +19,6 @@
  #define DOUBLE_PRECISION
  #include "fp_lib.h"
  
-ARM_EABI_FNALIAS(ddiv, divdf3)
-
  COMPILER_RT_ABI fp_t
  __divdf3(fp_t a, fp_t b) {
      
@@ -183,3 +181,10 @@ __divdf3(fp_t a, fp_t b) {
          return result;
      }
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_ddiv(fp_t a, fp_t b) {
+  return __divdf3(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/divsf3.c b/lib/libcompiler_rt/divsf3.c

index de2e376..aa6289a 100644 (file)
--- a/lib/libcompiler_rt/divsf3.c
+++ b/lib/libcompiler_rt/divsf3.c
@@ -19,8 +19,6 @@
  #define SINGLE_PRECISION
  #include "fp_lib.h"
  
-ARM_EABI_FNALIAS(fdiv, divsf3)
-
  COMPILER_RT_ABI fp_t
  __divsf3(fp_t a, fp_t b) {
      
@@ -167,3 +165,10 @@ __divsf3(fp_t a, fp_t b) {
          return fromRep(absResult | quotientSign);
      }
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_fdiv(fp_t a, fp_t b) {
+  return __divsf3(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/divsi3.c b/lib/libcompiler_rt/divsi3.c

index bab4aef..3852e39 100644 (file)
--- a/lib/libcompiler_rt/divsi3.c
+++ b/lib/libcompiler_rt/divsi3.c
@@ -16,8 +16,6 @@
  
  /* Returns: a / b */
  
-ARM_EABI_FNALIAS(idiv, divsi3)
-
  COMPILER_RT_ABI si_int
  __divsi3(si_int a, si_int b)
  {
@@ -35,3 +33,10 @@ __divsi3(si_int a, si_int b)
       */
      return ((su_int)a/(su_int)b ^ s_a) - s_a;    /* negate if s_a == -1 */
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI si_int __aeabi_idiv(si_int a, si_int b) {
+  return __divsi3(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/divtc3.c b/lib/libcompiler_rt/divtc3.c

index 04693df..16e538b 100644 (file)
--- a/lib/libcompiler_rt/divtc3.c
+++ b/lib/libcompiler_rt/divtc3.c
@@ -17,7 +17,7 @@
  
  /* Returns: the quotient of (a + ib) / (c + id) */
  
-COMPILER_RT_ABI long double _Complex
+COMPILER_RT_ABI Lcomplex
  __divtc3(long double __a, long double __b, long double __c, long double __d)
  {
      int __ilogbw = 0;
@@ -29,31 +29,31 @@ __divtc3(long double __a, long double __b, long double __c, long double __d)
          __d = crt_scalbnl(__d, -__ilogbw);
      }
      long double __denom = __c * __c + __d * __d;
-    long double _Complex z;
-    __real__ z = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw);
-    __imag__ z = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw);
-    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z))
+    Lcomplex z;
+    COMPLEX_REAL(z) = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw);
+    COMPLEX_IMAGINARY(z) = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw);
+    if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
      {
          if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b)))
          {
-            __real__ z = crt_copysignl(CRT_INFINITY, __c) * __a;
-            __imag__ z = crt_copysignl(CRT_INFINITY, __c) * __b;
+            COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a;
+            COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b;
          }
          else if ((crt_isinf(__a) || crt_isinf(__b)) &&
                   crt_isfinite(__c) && crt_isfinite(__d))
          {
              __a = crt_copysignl(crt_isinf(__a) ? 1.0 : 0.0, __a);
              __b = crt_copysignl(crt_isinf(__b) ? 1.0 : 0.0, __b);
-            __real__ z = CRT_INFINITY * (__a * __c + __b * __d);
-            __imag__ z = CRT_INFINITY * (__b * __c - __a * __d);
+            COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
+            COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
          }
          else if (crt_isinf(__logbw) && __logbw > 0.0 &&
                   crt_isfinite(__a) && crt_isfinite(__b))
          {
              __c = crt_copysignl(crt_isinf(__c) ? 1.0 : 0.0, __c);
              __d = crt_copysignl(crt_isinf(__d) ? 1.0 : 0.0, __d);
-            __real__ z = 0.0 * (__a * __c + __b * __d);
-            __imag__ z = 0.0 * (__b * __c - __a * __d);
+            COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d);
+            COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d);
          }
      }
      return z;
diff --git a/lib/libcompiler_rt/emutls.c b/lib/libcompiler_rt/emutls.c

index eccbf53..12aad3a 100644 (file)
--- a/lib/libcompiler_rt/emutls.c
+++ b/lib/libcompiler_rt/emutls.c
@@ -7,7 +7,6 @@
   *
   * ===----------------------------------------------------------------------===
   */
-#include <pthread.h>
  #include <stdint.h>
  #include <stdlib.h>
  #include <string.h>
@@ -15,6 +14,23 @@
  #include "int_lib.h"
  #include "int_util.h"
  
+typedef struct emutls_address_array {
+    uintptr_t size;  /* number of elements in the 'data' array */
+    void* data[];
+} emutls_address_array;
+
+static void emutls_shutdown(emutls_address_array *array);
+
+#ifndef _WIN32
+
+#include <pthread.h>
+
+static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_key_t emutls_pthread_key;
+
+typedef unsigned int gcc_word __attribute__((mode(word)));
+typedef unsigned int gcc_pointer __attribute__((mode(pointer)));
+
  /* Default is not to use posix_memalign, so systems like Android
   * can use thread local data without heavier POSIX memory allocators.
   */
@@ -22,26 +38,6 @@
  #define EMUTLS_USE_POSIX_MEMALIGN 0
  #endif
  
-/* For every TLS variable xyz,
- * there is one __emutls_control variable named __emutls_v.xyz.
- * If xyz has non-zero initial value, __emutls_v.xyz's "value"
- * will point to __emutls_t.xyz, which has the initial value.
- */
-typedef unsigned int gcc_word __attribute__((mode(word)));
-typedef struct __emutls_control {
-    /* Must use gcc_word here, instead of size_t, to match GCC.  When
-       gcc_word is larger than size_t, the upper extra bits are all
-       zeros.  We can use variables of size_t to operate on size and
-       align.  */
-    gcc_word size;  /* size of the object in bytes */
-    gcc_word align;  /* alignment of the object in bytes */
-    union {
-        uintptr_t index;  /* data[index-1] is the object address */
-        void* address;  /* object address, when in single thread env */
-    } object;
-    void* value;  /* null or non-zero initial value for the object */
-} __emutls_control;
-
  static __inline void *emutls_memalign_alloc(size_t align, size_t size) {
      void *base;
  #if EMUTLS_USE_POSIX_MEMALIGN
@@ -50,7 +46,7 @@ static __inline void *emutls_memalign_alloc(size_t align, size_t size) {
  #else
      #define EXTRA_ALIGN_PTR_BYTES (align - 1 + sizeof(void*))
      char* object;
-    if ((object = malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL)
+    if ((object = (char*)malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL)
          abort();
      base = (void*)(((uintptr_t)(object + EXTRA_ALIGN_PTR_BYTES))
                      & ~(uintptr_t)(align - 1));
@@ -69,10 +65,207 @@ static __inline void emutls_memalign_free(void *base) {
  #endif
  }
  
+static void emutls_key_destructor(void* ptr) {
+    emutls_shutdown((emutls_address_array*)ptr);
+    free(ptr);
+}
+
+static __inline void emutls_init(void) {
+    if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0)
+        abort();
+}
+
+static __inline void emutls_init_once(void) {
+    static pthread_once_t once = PTHREAD_ONCE_INIT;
+    pthread_once(&once, emutls_init);
+}
+
+static __inline void emutls_lock() {
+    pthread_mutex_lock(&emutls_mutex);
+}
+
+static __inline void emutls_unlock() {
+    pthread_mutex_unlock(&emutls_mutex);
+}
+
+static __inline void emutls_setspecific(emutls_address_array *value) {
+    pthread_setspecific(emutls_pthread_key, (void*) value);
+}
+
+static __inline emutls_address_array* emutls_getspecific() {
+    return (emutls_address_array*) pthread_getspecific(emutls_pthread_key);
+}
+
+#else
+
+#include <windows.h>
+#include <malloc.h>
+#include <stdio.h>
+#include <assert.h>
+#include <immintrin.h>
+
+static LPCRITICAL_SECTION emutls_mutex;
+static DWORD emutls_tls_index = TLS_OUT_OF_INDEXES;
+
+typedef uintptr_t gcc_word;
+typedef void * gcc_pointer;
+
+static void win_error(DWORD last_err, const char *hint) {
+    char *buffer = NULL;
+    if (FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER |
+                       FORMAT_MESSAGE_FROM_SYSTEM |
+                       FORMAT_MESSAGE_MAX_WIDTH_MASK,
+                       NULL, last_err, 0, (LPSTR)&buffer, 1, NULL)) {
+        fprintf(stderr, "Windows error: %s\n", buffer);
+    } else {
+        fprintf(stderr, "Unkown Windows error: %s\n", hint);
+    }
+    LocalFree(buffer);
+}
+
+static __inline void win_abort(DWORD last_err, const char *hint) {
+    win_error(last_err, hint);
+    abort();
+}
+
+static __inline void *emutls_memalign_alloc(size_t align, size_t size) {
+    void *base = _aligned_malloc(size, align);
+    if (!base)
+        win_abort(GetLastError(), "_aligned_malloc");
+    return base;
+}
+
+static __inline void emutls_memalign_free(void *base) {
+    _aligned_free(base);
+}
+
+static void emutls_exit(void) {
+    if (emutls_mutex) {
+        DeleteCriticalSection(emutls_mutex);
+        _aligned_free(emutls_mutex);
+        emutls_mutex = NULL;
+    }
+    if (emutls_tls_index != TLS_OUT_OF_INDEXES) {
+        emutls_shutdown((emutls_address_array*)TlsGetValue(emutls_tls_index));
+        TlsFree(emutls_tls_index);
+        emutls_tls_index = TLS_OUT_OF_INDEXES;
+    }
+}
+
+#pragma warning (push)
+#pragma warning (disable : 4100)
+static BOOL CALLBACK emutls_init(PINIT_ONCE p0, PVOID p1, PVOID *p2) {
+    emutls_mutex = (LPCRITICAL_SECTION)_aligned_malloc(sizeof(CRITICAL_SECTION), 16);
+    if (!emutls_mutex) {
+        win_error(GetLastError(), "_aligned_malloc");
+        return FALSE;
+    }
+    InitializeCriticalSection(emutls_mutex);
+
+    emutls_tls_index = TlsAlloc();
+    if (emutls_tls_index == TLS_OUT_OF_INDEXES) {
+        emutls_exit();
+        win_error(GetLastError(), "TlsAlloc");
+        return FALSE;
+    }
+    atexit(&emutls_exit);
+    return TRUE;
+}
+
+static __inline void emutls_init_once(void) {
+    static INIT_ONCE once;
+    InitOnceExecuteOnce(&once, emutls_init, NULL, NULL);
+}
+
+static __inline void emutls_lock() {
+    EnterCriticalSection(emutls_mutex);
+}
+
+static __inline void emutls_unlock() {
+    LeaveCriticalSection(emutls_mutex);
+}
+
+static __inline void emutls_setspecific(emutls_address_array *value) {
+    if (TlsSetValue(emutls_tls_index, (LPVOID) value) == 0)
+        win_abort(GetLastError(), "TlsSetValue");
+}
+
+static __inline emutls_address_array* emutls_getspecific() {
+    LPVOID value = TlsGetValue(emutls_tls_index);
+    if (value == NULL) {
+        const DWORD err = GetLastError();
+        if (err != ERROR_SUCCESS)
+            win_abort(err, "TlsGetValue");
+    }
+    return (emutls_address_array*) value;
+}
+
+/* Provide atomic load/store functions for emutls_get_index if built with MSVC.
+ */
+#if !defined(__ATOMIC_RELEASE)
+
+enum { __ATOMIC_ACQUIRE = 2, __ATOMIC_RELEASE = 3 };
+
+static __inline uintptr_t __atomic_load_n(void *ptr, unsigned type) {
+    assert(type == __ATOMIC_ACQUIRE);
+#ifdef _WIN64
+    return (uintptr_t) _load_be_u64(ptr);
+#else
+    return (uintptr_t) _load_be_u32(ptr);
+#endif
+}
+
+static __inline void __atomic_store_n(void *ptr, uintptr_t val, unsigned type) {
+    assert(type == __ATOMIC_RELEASE);
+#ifdef _WIN64
+    _store_be_u64(ptr, val);
+#else
+    _store_be_u32(ptr, val);
+#endif
+}
+
+#endif
+
+#pragma warning (pop)
+
+#endif
+
+static size_t emutls_num_object = 0;  /* number of allocated TLS objects */
+
+/* Free the allocated TLS data
+ */
+static void emutls_shutdown(emutls_address_array *array) {
+    if (array) {
+        uintptr_t i;
+        for (i = 0; i < array->size; ++i) {
+            if (array->data[i])
+                emutls_memalign_free(array->data[i]);
+        }
+    }
+}
+
+/* For every TLS variable xyz,
+ * there is one __emutls_control variable named __emutls_v.xyz.
+ * If xyz has non-zero initial value, __emutls_v.xyz's "value"
+ * will point to __emutls_t.xyz, which has the initial value.
+ */
+typedef struct __emutls_control {
+    /* Must use gcc_word here, instead of size_t, to match GCC.  When
+       gcc_word is larger than size_t, the upper extra bits are all
+       zeros.  We can use variables of size_t to operate on size and
+       align.  */
+    gcc_word size;  /* size of the object in bytes */
+    gcc_word align;  /* alignment of the object in bytes */
+    union {
+        uintptr_t index;  /* data[index-1] is the object address */
+        void* address;  /* object address, when in single thread env */
+    } object;
+    void* value;  /* null or non-zero initial value for the object */
+} __emutls_control;
+
  /* Emulated TLS objects are always allocated at run-time. */
  static __inline void *emutls_allocate_object(__emutls_control *control) {
      /* Use standard C types, check with gcc's emutls.o. */
-    typedef unsigned int gcc_pointer __attribute__((mode(pointer)));
      COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(gcc_pointer));
      COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(void*));
  
@@ -93,45 +286,19 @@ static __inline void *emutls_allocate_object(__emutls_control *control) {
      return base;
  }
  
-static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-static size_t emutls_num_object = 0;  /* number of allocated TLS objects */
-
-typedef struct emutls_address_array {
-    uintptr_t size;  /* number of elements in the 'data' array */
-    void* data[];
-} emutls_address_array;
-
-static pthread_key_t emutls_pthread_key;
-
-static void emutls_key_destructor(void* ptr) {
-    emutls_address_array* array = (emutls_address_array*)ptr;
-    uintptr_t i;
-    for (i = 0; i < array->size; ++i) {
-        if (array->data[i])
-            emutls_memalign_free(array->data[i]);
-    }
-    free(ptr);
-}
-
-static void emutls_init(void) {
-    if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0)
-        abort();
-}
  
  /* Returns control->object.index; set index if not allocated yet. */
  static __inline uintptr_t emutls_get_index(__emutls_control *control) {
      uintptr_t index = __atomic_load_n(&control->object.index, __ATOMIC_ACQUIRE);
      if (!index) {
-        static pthread_once_t once = PTHREAD_ONCE_INIT;
-        pthread_once(&once, emutls_init);
-        pthread_mutex_lock(&emutls_mutex);
+        emutls_init_once();
+        emutls_lock();
          index = control->object.index;
          if (!index) {
              index = ++emutls_num_object;
              __atomic_store_n(&control->object.index, index, __ATOMIC_RELEASE);
          }
-        pthread_mutex_unlock(&emutls_mutex);
+        emutls_unlock();
      }
      return index;
  }
@@ -142,7 +309,7 @@ static __inline void emutls_check_array_set_size(emutls_address_array *array,
      if (array == NULL)
          abort();
      array->size = size;
-    pthread_setspecific(emutls_pthread_key, (void*)array);
+    emutls_setspecific(array);
  }
  
  /* Returns the new 'data' array size, number of elements,
@@ -156,22 +323,29 @@ static __inline uintptr_t emutls_new_data_array_size(uintptr_t index) {
      return ((index + 1 + 15) & ~((uintptr_t)15)) - 1;
  }
  
+/* Returns the size in bytes required for an emutls_address_array with
+ * N number of elements for data field.
+ */
+static __inline uintptr_t emutls_asize(uintptr_t N) {
+    return N * sizeof(void *) + sizeof(emutls_address_array);
+}
+
  /* Returns the thread local emutls_address_array.
   * Extends its size if necessary to hold address at index.
   */
  static __inline emutls_address_array *
  emutls_get_address_array(uintptr_t index) {
-    emutls_address_array* array = pthread_getspecific(emutls_pthread_key);
+    emutls_address_array* array = emutls_getspecific();
      if (array == NULL) {
          uintptr_t new_size = emutls_new_data_array_size(index);
-        array = malloc(new_size * sizeof(void *) + sizeof(emutls_address_array));
+        array = (emutls_address_array*) malloc(emutls_asize(new_size));
          if (array)
              memset(array->data, 0, new_size * sizeof(void*));
          emutls_check_array_set_size(array, new_size);
      } else if (index > array->size) {
          uintptr_t orig_size = array->size;
          uintptr_t new_size = emutls_new_data_array_size(index);
-        array = realloc(array, new_size * sizeof(void *) + sizeof(emutls_address_array));
+        array = (emutls_address_array*) realloc(array, emutls_asize(new_size));
          if (array)
              memset(array->data + orig_size, 0,
                     (new_size - orig_size) * sizeof(void*));
@@ -182,8 +356,8 @@ emutls_get_address_array(uintptr_t index) {
  
  void* __emutls_get_address(__emutls_control* control) {
      uintptr_t index = emutls_get_index(control);
-    emutls_address_array* array = emutls_get_address_array(index);
-    if (array->data[index - 1] == NULL)
-        array->data[index - 1] = emutls_allocate_object(control);
-    return array->data[index - 1];
+    emutls_address_array* array = emutls_get_address_array(index--);
+    if (array->data[index] == NULL)
+        array->data[index] = emutls_allocate_object(control);
+    return array->data[index];
  }
diff --git a/lib/libcompiler_rt/extendhfsf2.c b/lib/libcompiler_rt/extendhfsf2.c

index 27115a4..e7d9fde 100644 (file)
--- a/lib/libcompiler_rt/extendhfsf2.c
+++ b/lib/libcompiler_rt/extendhfsf2.c
@@ -12,8 +12,6 @@
  #define DST_SINGLE
  #include "fp_extend_impl.inc"
  
-ARM_EABI_FNALIAS(h2f, extendhfsf2)
-
  // Use a forwarding definition and noinline to implement a poor man's alias,
  // as there isn't a good cross-platform way of defining one.
  COMPILER_RT_ABI NOINLINE float __extendhfsf2(uint16_t a) {
@@ -23,3 +21,10 @@ COMPILER_RT_ABI NOINLINE float __extendhfsf2(uint16_t a) {
  COMPILER_RT_ABI float __gnu_h2f_ieee(uint16_t a) {
      return __extendhfsf2(a);
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI float __aeabi_h2f(uint16_t a) {
+  return __extendhfsf2(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/extendsfdf2.c b/lib/libcompiler_rt/extendsfdf2.c

index 7a267c2..b9e7a74 100644 (file)
--- a/lib/libcompiler_rt/extendsfdf2.c
+++ b/lib/libcompiler_rt/extendsfdf2.c
@@ -12,8 +12,13 @@
  #define DST_DOUBLE
  #include "fp_extend_impl.inc"
  
-ARM_EABI_FNALIAS(f2d, extendsfdf2)
-
  COMPILER_RT_ABI double __extendsfdf2(float a) {
      return __extendXfYf2__(a);
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI double __aeabi_f2d(float a) {
+  return __extendsfdf2(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/ffssi2.c b/lib/libcompiler_rt/ffssi2.c

new file mode 100644 (file)

index 0000000..e5180ef
--- /dev/null
+++ b/lib/libcompiler_rt/ffssi2.c
@@ -0,0 +1,29 @@
+/* ===-- ffssi2.c - Implement __ffssi2 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ffssi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: the index of the least significant 1-bit in a, or
+ * the value zero if a is zero. The least significant bit is index one.
+ */
+
+COMPILER_RT_ABI si_int
+__ffssi2(si_int a)
+{
+    if (a == 0)
+    {
+        return 0;
+    }
+    return __builtin_ctz(a) + 1;
+}
diff --git a/lib/libcompiler_rt/fixdfdi.c b/lib/libcompiler_rt/fixdfdi.c

index 14283ef..31d76df 100644 (file)
--- a/lib/libcompiler_rt/fixdfdi.c
+++ b/lib/libcompiler_rt/fixdfdi.c
@@ -10,7 +10,6 @@
  
  #define DOUBLE_PRECISION
  #include "fp_lib.h"
-ARM_EABI_FNALIAS(d2lz, fixdfdi)
  
  #ifndef __SOFT_FP__
  /* Support for systems that have hardware floating-point; can set the invalid
@@ -44,3 +43,15 @@ __fixdfdi(fp_t a) {
  }
  
  #endif
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI di_int
+#if defined(__SOFT_FP__)
+__aeabi_d2lz(fp_t a) {
+#else
+__aeabi_d2lz(double a) {
+#endif
+  return __fixdfdi(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/fixdfsi.c b/lib/libcompiler_rt/fixdfsi.c

index 704e65b..fc316dc 100644 (file)
--- a/lib/libcompiler_rt/fixdfsi.c
+++ b/lib/libcompiler_rt/fixdfsi.c
@@ -14,9 +14,14 @@ typedef si_int fixint_t;
  typedef su_int fixuint_t;
  #include "fp_fixint_impl.inc"
  
-ARM_EABI_FNALIAS(d2iz, fixdfsi)
-
  COMPILER_RT_ABI si_int
  __fixdfsi(fp_t a) {
      return __fixint(a);
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI si_int __aeabi_d2iz(fp_t a) {
+  return __fixdfsi(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/fixsfdi.c b/lib/libcompiler_rt/fixsfdi.c

index fab47e2..c434736 100644 (file)
--- a/lib/libcompiler_rt/fixsfdi.c
+++ b/lib/libcompiler_rt/fixsfdi.c
@@ -11,8 +11,6 @@
  #define SINGLE_PRECISION
  #include "fp_lib.h"
  
-ARM_EABI_FNALIAS(f2lz, fixsfdi)
-
  #ifndef __SOFT_FP__
  /* Support for systems that have hardware floating-point; can set the invalid
   * flag as a side-effect of computation.
@@ -45,3 +43,15 @@ __fixsfdi(fp_t a) {
  }
  
  #endif
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI di_int
+#if defined(__SOFT_FP__)
+__aeabi_f2lz(fp_t a) {
+#else
+__aeabi_f2lz(float a) {
+#endif
+  return __fixsfdi(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/fixsfsi.c b/lib/libcompiler_rt/fixsfsi.c

index f045536..3276df9 100644 (file)
--- a/lib/libcompiler_rt/fixsfsi.c
+++ b/lib/libcompiler_rt/fixsfsi.c
@@ -14,9 +14,14 @@ typedef si_int fixint_t;
  typedef su_int fixuint_t;
  #include "fp_fixint_impl.inc"
  
-ARM_EABI_FNALIAS(f2iz, fixsfsi)
-
  COMPILER_RT_ABI si_int
  __fixsfsi(fp_t a) {
      return __fixint(a);
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI si_int __aeabi_f2iz(fp_t a) {
+  return __fixsfsi(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/fixunsdfdi.c b/lib/libcompiler_rt/fixunsdfdi.c

index 4b0bc9e..b734409 100644 (file)
--- a/lib/libcompiler_rt/fixunsdfdi.c
+++ b/lib/libcompiler_rt/fixunsdfdi.c
@@ -11,8 +11,6 @@
  #define DOUBLE_PRECISION
  #include "fp_lib.h"
  
-ARM_EABI_FNALIAS(d2ulz, fixunsdfdi)
-
  #ifndef __SOFT_FP__
  /* Support for systems that have hardware floating-point; can set the invalid
   * flag as a side-effect of computation.
@@ -42,3 +40,15 @@ __fixunsdfdi(fp_t a) {
  }
  
  #endif
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI du_int
+#if defined(__SOFT_FP__)
+__aeabi_d2ulz(fp_t a) {
+#else
+__aeabi_d2ulz(double a) {
+#endif
+  return __fixunsdfdi(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/fixunsdfsi.c b/lib/libcompiler_rt/fixunsdfsi.c

index 232d342..bb3d8e0 100644 (file)
--- a/lib/libcompiler_rt/fixunsdfsi.c
+++ b/lib/libcompiler_rt/fixunsdfsi.c
@@ -13,9 +13,14 @@
  typedef su_int fixuint_t;
  #include "fp_fixuint_impl.inc"
  
-ARM_EABI_FNALIAS(d2uiz, fixunsdfsi)
-
  COMPILER_RT_ABI su_int
  __fixunsdfsi(fp_t a) {
      return __fixuint(a);
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI su_int __aeabi_d2uiz(fp_t a) {
+  return __fixunsdfsi(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/fixunssfdi.c b/lib/libcompiler_rt/fixunssfdi.c

index f8ebab8..5d92245 100644 (file)
--- a/lib/libcompiler_rt/fixunssfdi.c
+++ b/lib/libcompiler_rt/fixunssfdi.c
@@ -11,8 +11,6 @@
  #define SINGLE_PRECISION
  #include "fp_lib.h"
  
-ARM_EABI_FNALIAS(f2ulz, fixunssfdi)
-
  #ifndef __SOFT_FP__
  /* Support for systems that have hardware floating-point; can set the invalid
   * flag as a side-effect of computation.
@@ -43,3 +41,15 @@ __fixunssfdi(fp_t a) {
  }
  
  #endif
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI du_int
+#if defined(__SOFT_FP__)
+__aeabi_f2ulz(fp_t a) {
+#else
+__aeabi_f2ulz(float a) {
+#endif
+  return __fixunssfdi(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/fixunssfsi.c b/lib/libcompiler_rt/fixunssfsi.c

index cc2b05b..91d5e8a 100644 (file)
--- a/lib/libcompiler_rt/fixunssfsi.c
+++ b/lib/libcompiler_rt/fixunssfsi.c
@@ -17,9 +17,14 @@
  typedef su_int fixuint_t;
  #include "fp_fixuint_impl.inc"
  
-ARM_EABI_FNALIAS(f2uiz, fixunssfsi)
-
  COMPILER_RT_ABI su_int
  __fixunssfsi(fp_t a) {
      return __fixuint(a);
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI su_int __aeabi_f2uiz(fp_t a) {
+  return __fixunssfsi(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/floatdidf.c b/lib/libcompiler_rt/floatdidf.c

index 2b023ad..681fece 100644 (file)
--- a/lib/libcompiler_rt/floatdidf.c
+++ b/lib/libcompiler_rt/floatdidf.c
@@ -22,8 +22,6 @@
  
  /* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */
  
-ARM_EABI_FNALIAS(l2d, floatdidf)
-
  #ifndef __SOFT_FP__
  /* Support for systems that have hardware floating-point; we'll set the inexact flag
   * as a side-effect of this computation.
@@ -105,3 +103,10 @@ __floatdidf(di_int a)
      return fb.f;
  }
  #endif
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI double __aeabi_l2d(di_int a) {
+  return __floatdidf(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/floatdisf.c b/lib/libcompiler_rt/floatdisf.c

index 3e47580..dd54816 100644 (file)
--- a/lib/libcompiler_rt/floatdisf.c
+++ b/lib/libcompiler_rt/floatdisf.c
@@ -22,8 +22,6 @@
  
  #include "int_lib.h"
  
-ARM_EABI_FNALIAS(l2f, floatdisf)
-
  COMPILER_RT_ABI float
  __floatdisf(di_int a)
  {
@@ -78,3 +76,10 @@ __floatdisf(di_int a)
             ((su_int)a & 0x007FFFFF);   /* mantissa */
      return fb.f;
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI float __aeabi_l2f(di_int a) {
+  return __floatdisf(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/floatsidf.c b/lib/libcompiler_rt/floatsidf.c

index 1cf99b7..2ae395b 100644 (file)
--- a/lib/libcompiler_rt/floatsidf.c
+++ b/lib/libcompiler_rt/floatsidf.c
@@ -18,8 +18,6 @@
  
  #include "int_lib.h"
  
-ARM_EABI_FNALIAS(i2d, floatsidf)
-
  COMPILER_RT_ABI fp_t
  __floatsidf(int a) {
      
@@ -51,3 +49,10 @@ __floatsidf(int a) {
      // Insert the sign bit and return
      return fromRep(result | sign);
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_i2d(int a) {
+  return __floatsidf(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/floatsisf.c b/lib/libcompiler_rt/floatsisf.c

index 467dd1d..08891fc 100644 (file)
--- a/lib/libcompiler_rt/floatsisf.c
+++ b/lib/libcompiler_rt/floatsisf.c
@@ -18,8 +18,6 @@
  
  #include "int_lib.h"
  
-ARM_EABI_FNALIAS(i2f, floatsisf)
-
  COMPILER_RT_ABI fp_t
  __floatsisf(int a) {
      
@@ -57,3 +55,10 @@ __floatsisf(int a) {
      // Insert the sign bit and return
      return fromRep(result | sign);
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_i2f(int a) {
+  return __floatsisf(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/floattitf.c b/lib/libcompiler_rt/floattitf.c

new file mode 100644 (file)

index 0000000..994fded
--- /dev/null
+++ b/lib/libcompiler_rt/floattitf.c
@@ -0,0 +1,82 @@
+//===-- lib/floattitf.c - int128 -> quad-precision conversion -----*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ti_int to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+#include "int_lib.h"
+
+/* Returns: convert a ti_int to a fp_t, rounding toward even. */
+
+/* Assumption: fp_t is a IEEE 128 bit floating point type
+ *             ti_int is a 128 bit integral type
+ */
+
+/* seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm |
+ * mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+COMPILER_RT_ABI fp_t
+__floattitf(ti_int a) {
+    if (a == 0)
+        return 0.0;
+    const unsigned N = sizeof(ti_int) * CHAR_BIT;
+    const ti_int s = a >> (N-1);
+    a = (a ^ s) - s;
+    int sd = N - __clzti2(a);  /* number of significant digits */
+    int e = sd - 1;            /* exponent */
+    if (sd > LDBL_MANT_DIG) {
+        /*  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+         *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+         *                                                12345678901234567890123456
+         *  1 = msb 1 bit
+         *  P = bit LDBL_MANT_DIG-1 bits to the right of 1
+         *  Q = bit LDBL_MANT_DIG bits to the right of 1
+         *  R = "or" of all bits to the right of Q
+         */
+        switch (sd) {
+        case LDBL_MANT_DIG + 1:
+            a <<= 1;
+            break;
+        case LDBL_MANT_DIG + 2:
+            break;
+        default:
+            a = ((tu_int)a >> (sd - (LDBL_MANT_DIG+2))) |
+                ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0);
+        };
+        /* finish: */
+        a |= (a & 4) != 0;  /* Or P into R */
+        ++a;  /* round - this step may add a significant bit */
+        a >>= 2;  /* dump Q and R */
+        /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */
+        if (a & ((tu_int)1 << LDBL_MANT_DIG)) {
+            a >>= 1;
+            ++e;
+        }
+        /* a is now rounded to LDBL_MANT_DIG bits */
+    } else {
+        a <<= (LDBL_MANT_DIG - sd);
+        /* a is now rounded to LDBL_MANT_DIG bits */
+    }
+
+    long_double_bits fb;
+    fb.u.high.all = (s & 0x8000000000000000LL)           /* sign */
+                  | (du_int)(e + 16383) << 48            /* exponent */
+                  | ((a >> 64) & 0x0000ffffffffffffLL);  /* significand */
+    fb.u.low.all = (du_int)(a);
+    return fb.f;
+}
+
+#endif
diff --git a/lib/libcompiler_rt/floatundidf.c b/lib/libcompiler_rt/floatundidf.c

index cfd3a7a..6c1a931 100644 (file)
--- a/lib/libcompiler_rt/floatundidf.c
+++ b/lib/libcompiler_rt/floatundidf.c
@@ -22,8 +22,6 @@
  
  #include "int_lib.h"
  
-ARM_EABI_FNALIAS(ul2d, floatundidf)
-
  #ifndef __SOFT_FP__
  /* Support for systems that have hardware floating-point; we'll set the inexact flag
   * as a side-effect of this computation.
@@ -104,3 +102,10 @@ __floatundidf(du_int a)
      return fb.f;
  }
  #endif
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI double __aeabi_ul2d(du_int a) {
+  return __floatundidf(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/floatundisf.c b/lib/libcompiler_rt/floatundisf.c

index 713a44a..86841a7 100644 (file)
--- a/lib/libcompiler_rt/floatundisf.c
+++ b/lib/libcompiler_rt/floatundisf.c
@@ -22,8 +22,6 @@
  
  #include "int_lib.h"
  
-ARM_EABI_FNALIAS(ul2f, floatundisf)
-
  COMPILER_RT_ABI float
  __floatundisf(du_int a)
  {
@@ -75,3 +73,10 @@ __floatundisf(du_int a)
             ((su_int)a & 0x007FFFFF);  /* mantissa */
      return fb.f;
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI float __aeabi_ul2f(du_int a) {
+  return __floatundisf(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/floatunsidf.c b/lib/libcompiler_rt/floatunsidf.c

index 445e180..8d48071 100644 (file)
--- a/lib/libcompiler_rt/floatunsidf.c
+++ b/lib/libcompiler_rt/floatunsidf.c
@@ -18,8 +18,6 @@
  
  #include "int_lib.h"
  
-ARM_EABI_FNALIAS(ui2d, floatunsidf)
-
  COMPILER_RT_ABI fp_t
  __floatunsidf(unsigned int a) {
      
@@ -40,3 +38,10 @@ __floatunsidf(unsigned int a) {
      result += (rep_t)(exponent + exponentBias) << significandBits;
      return fromRep(result);
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_ui2d(unsigned int a) {
+  return __floatunsidf(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/floatunsisf.c b/lib/libcompiler_rt/floatunsisf.c

index ea6f161..f194c04 100644 (file)
--- a/lib/libcompiler_rt/floatunsisf.c
+++ b/lib/libcompiler_rt/floatunsisf.c
@@ -18,8 +18,6 @@
  
  #include "int_lib.h"
  
-ARM_EABI_FNALIAS(ui2f, floatunsisf)
-
  COMPILER_RT_ABI fp_t
  __floatunsisf(unsigned int a) {
      
@@ -48,3 +46,10 @@ __floatunsisf(unsigned int a) {
      result += (rep_t)(exponent + exponentBias) << significandBits;
      return fromRep(result);
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_ui2f(unsigned int a) {
+  return __floatunsisf(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/floatuntitf.c b/lib/libcompiler_rt/floatuntitf.c

new file mode 100644 (file)

index 0000000..e2518c9
--- /dev/null
+++ b/lib/libcompiler_rt/floatuntitf.c
@@ -0,0 +1,79 @@
+//===-- lib/floatuntitf.c - uint128 -> quad-precision conversion --*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements tu_int to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+#include "int_lib.h"
+
+/* Returns: convert a tu_int to a fp_t, rounding toward even. */
+
+/* Assumption: fp_t is a IEEE 128 bit floating point type
+ *             tu_int is a 128 bit integral type
+ */
+
+/* seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm |
+ * mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+COMPILER_RT_ABI fp_t
+__floatuntitf(tu_int a) {
+    if (a == 0)
+        return 0.0;
+    const unsigned N = sizeof(tu_int) * CHAR_BIT;
+    int sd = N - __clzti2(a);  /* number of significant digits */
+    int e = sd - 1;            /* exponent */
+    if (sd > LDBL_MANT_DIG) {
+        /*  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+         *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+         *                                                12345678901234567890123456
+         *  1 = msb 1 bit
+         *  P = bit LDBL_MANT_DIG-1 bits to the right of 1
+         *  Q = bit LDBL_MANT_DIG bits to the right of 1
+         *  R = "or" of all bits to the right of Q
+         */
+        switch (sd) {
+        case LDBL_MANT_DIG + 1:
+            a <<= 1;
+            break;
+        case LDBL_MANT_DIG + 2:
+            break;
+        default:
+            a = (a >> (sd - (LDBL_MANT_DIG+2))) |
+                ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0);
+        };
+        /* finish: */
+        a |= (a & 4) != 0;  /* Or P into R */
+        ++a;  /* round - this step may add a significant bit */
+        a >>= 2;  /* dump Q and R */
+        /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */
+        if (a & ((tu_int)1 << LDBL_MANT_DIG)) {
+            a >>= 1;
+            ++e;
+        }
+        /* a is now rounded to LDBL_MANT_DIG bits */
+    } else {
+        a <<= (LDBL_MANT_DIG - sd);
+        /* a is now rounded to LDBL_MANT_DIG bits */
+    }
+
+    long_double_bits fb;
+    fb.u.high.all = (du_int)(e + 16383) << 48            /* exponent */
+                  | ((a >> 64) & 0x0000ffffffffffffLL);  /* significand */
+    fb.u.low.all = (du_int)(a);
+    return fb.f;
+}
+
+#endif
diff --git a/lib/libcompiler_rt/i386/Makefile.mk b/lib/libcompiler_rt/i386/Makefile.mk

deleted file mode 100644 (file)

index f3776a0..0000000
--- a/lib/libcompiler_rt/i386/Makefile.mk
+++ /dev/null
@@ -1,20 +0,0 @@
-#===- lib/builtins/i386/Makefile.mk ------------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs := 
-OnlyArchs := i386
-
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
-Implementation := Optimized
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
diff --git a/lib/libcompiler_rt/int_lib.h b/lib/libcompiler_rt/int_lib.h

index 8dfe567..9a8092d 100644 (file)
--- a/lib/libcompiler_rt/int_lib.h
+++ b/lib/libcompiler_rt/int_lib.h
@@ -30,14 +30,17 @@
  /* ABI macro definitions */
  
  #if __ARM_EABI__
-# define ARM_EABI_FNALIAS(aeabi_name, name)         \
-  void __aeabi_##aeabi_name() __attribute__((alias("__" #name)));
-# define COMPILER_RT_ABI __attribute__((pcs("aapcs")))
+# ifdef COMPILER_RT_ARMHF_TARGET
+#   define COMPILER_RT_ABI
+# else
+#   define COMPILER_RT_ABI __attribute__((__pcs__("aapcs")))
+# endif
  #else
-# define ARM_EABI_FNALIAS(aeabi_name, name)
  # define COMPILER_RT_ABI
  #endif
  
+#define AEABI_RTABI __attribute__((__pcs__("aapcs")))
+
  #ifdef _MSC_VER
  #define ALWAYS_INLINE __forceinline
  #define NOINLINE __declspec(noinline)
@@ -91,14 +94,14 @@ COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem);
  #include <intrin.h>
  
  uint32_t __inline __builtin_ctz(uint32_t value) {
-  uint32_t trailing_zero = 0;
+  unsigned long trailing_zero = 0;
    if (_BitScanForward(&trailing_zero, value))
      return trailing_zero;
    return 32;
  }
  
  uint32_t __inline __builtin_clz(uint32_t value) {
-  uint32_t leading_zero = 0;
+  unsigned long leading_zero = 0;
    if (_BitScanReverse(&leading_zero, value))
      return 31 - leading_zero;
    return 32;
@@ -106,7 +109,7 @@ uint32_t __inline __builtin_clz(uint32_t value) {
  
  #if defined(_M_ARM) || defined(_M_X64)
  uint32_t __inline __builtin_clzll(uint64_t value) {
-  uint32_t leading_zero = 0;
+  unsigned long leading_zero = 0;
    if (_BitScanReverse64(&leading_zero, value))
      return 63 - leading_zero;
    return 64;
diff --git a/lib/libcompiler_rt/int_types.h b/lib/libcompiler_rt/int_types.h

index 660385e..a92238c 100644 (file)
--- a/lib/libcompiler_rt/int_types.h
+++ b/lib/libcompiler_rt/int_types.h
@@ -60,9 +60,7 @@ typedef union
      }s;
  } udwords;
  
-/* MIPS64 issue: PR 20098 */
-#if (defined(__LP64__) || defined(__wasm__)) && \
-    !(defined(__mips__) && defined(__clang__))
+#if (defined(__LP64__) || defined(__wasm__) || defined(__mips64))
  #define CRT_HAS_128BIT
  #endif
  
diff --git a/lib/libcompiler_rt/int_util.c b/lib/libcompiler_rt/int_util.c

index 420d1e2..de87410 100644 (file)
--- a/lib/libcompiler_rt/int_util.c
+++ b/lib/libcompiler_rt/int_util.c
@@ -45,6 +45,16 @@ void compilerrt_abort_impl(const char *file, int line, const char *function) {
    __assert_rtn(function, file, line, "libcompiler_rt abort");
  }
  
+#elif __Fuchsia__
+
+#ifndef _WIN32
+__attribute__((weak))
+__attribute__((visibility("hidden")))
+#endif
+void compilerrt_abort_impl(const char *file, int line, const char *function) {
+  __builtin_trap();
+}
+
  #else
  
  /* Get the system definition of abort() */
diff --git a/lib/libcompiler_rt/lshrdi3.c b/lib/libcompiler_rt/lshrdi3.c

index 6b1ea92..becbbef 100644 (file)
--- a/lib/libcompiler_rt/lshrdi3.c
+++ b/lib/libcompiler_rt/lshrdi3.c
@@ -18,8 +18,6 @@
  
  /* Precondition:  0 <= b < bits_in_dword */
  
-ARM_EABI_FNALIAS(llsr, lshrdi3)
-
  COMPILER_RT_ABI di_int
  __lshrdi3(di_int a, si_int b)
  {
@@ -41,3 +39,10 @@ __lshrdi3(di_int a, si_int b)
      }
      return result.all;
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI di_int __aeabi_llsr(di_int a, si_int b) {
+  return __lshrdi3(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/mingw_fixfloat.c b/lib/libcompiler_rt/mingw_fixfloat.c

new file mode 100644 (file)

index 0000000..c462e0d
--- /dev/null
+++ b/lib/libcompiler_rt/mingw_fixfloat.c
@@ -0,0 +1,36 @@
+/* ===-- mingw_fixfloat.c - Wrap int/float conversions for arm/windows -----===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI di_int __fixdfdi(double a);
+COMPILER_RT_ABI di_int __fixsfdi(float a);
+COMPILER_RT_ABI du_int __fixunsdfdi(double a);
+COMPILER_RT_ABI du_int __fixunssfdi(float a);
+COMPILER_RT_ABI double __floatdidf(di_int a);
+COMPILER_RT_ABI float __floatdisf(di_int a);
+COMPILER_RT_ABI double __floatundidf(du_int a);
+COMPILER_RT_ABI float __floatundisf(du_int a);
+
+COMPILER_RT_ABI di_int __dtoi64(double a) { return __fixdfdi(a); }
+
+COMPILER_RT_ABI di_int __stoi64(float a) { return __fixsfdi(a); }
+
+COMPILER_RT_ABI du_int __dtou64(double a) { return __fixunsdfdi(a); }
+
+COMPILER_RT_ABI du_int __stou64(float a) { return __fixunssfdi(a); }
+
+COMPILER_RT_ABI double __i64tod(di_int a) { return __floatdidf(a); }
+
+COMPILER_RT_ABI float __i64tos(di_int a) { return __floatdisf(a); }
+
+COMPILER_RT_ABI double __u64tod(du_int a) { return __floatundidf(a); }
+
+COMPILER_RT_ABI float __u64tos(du_int a) { return __floatundisf(a); }
diff --git a/lib/libcompiler_rt/muldf3.c b/lib/libcompiler_rt/muldf3.c

index 1eb7338..59a6019 100644 (file)
--- a/lib/libcompiler_rt/muldf3.c
+++ b/lib/libcompiler_rt/muldf3.c
@@ -15,8 +15,13 @@
  #define DOUBLE_PRECISION
  #include "fp_mul_impl.inc"
  
-ARM_EABI_FNALIAS(dmul, muldf3)
-
  COMPILER_RT_ABI fp_t __muldf3(fp_t a, fp_t b) {
      return __mulXf3__(a, b);
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_dmul(fp_t a, fp_t b) {
+  return __muldf3(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/muldi3.c b/lib/libcompiler_rt/muldi3.c

index 2dae44c..6818a9e 100644 (file)
--- a/lib/libcompiler_rt/muldi3.c
+++ b/lib/libcompiler_rt/muldi3.c
@@ -40,8 +40,6 @@ __muldsi3(su_int a, su_int b)
  
  /* Returns: a * b */
  
-ARM_EABI_FNALIAS(lmul, muldi3)
-
  COMPILER_RT_ABI di_int
  __muldi3(di_int a, di_int b)
  {
@@ -54,3 +52,10 @@ __muldi3(di_int a, di_int b)
      r.s.high += x.s.high * y.s.low + x.s.low * y.s.high;
      return r.all;
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI di_int __aeabi_lmul(di_int a, di_int b) {
+  return __muldi3(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/mulsf3.c b/lib/libcompiler_rt/mulsf3.c

index 478b3bc..f141af1 100644 (file)
--- a/lib/libcompiler_rt/mulsf3.c
+++ b/lib/libcompiler_rt/mulsf3.c
@@ -15,8 +15,13 @@
  #define SINGLE_PRECISION
  #include "fp_mul_impl.inc"
  
-ARM_EABI_FNALIAS(fmul, mulsf3)
-
  COMPILER_RT_ABI fp_t __mulsf3(fp_t a, fp_t b) {
      return __mulXf3__(a, b);
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_fmul(fp_t a, fp_t b) {
+  return __mulsf3(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/negdf2.c b/lib/libcompiler_rt/negdf2.c

index d634b42..5e2544c 100644 (file)
--- a/lib/libcompiler_rt/negdf2.c
+++ b/lib/libcompiler_rt/negdf2.c
@@ -14,9 +14,14 @@
  #define DOUBLE_PRECISION
  #include "fp_lib.h"
  
-ARM_EABI_FNALIAS(dneg, negdf2)
-
  COMPILER_RT_ABI fp_t
  __negdf2(fp_t a) {
      return fromRep(toRep(a) ^ signBit);
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_dneg(fp_t a) {
+  return __negdf2(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/negsf2.c b/lib/libcompiler_rt/negsf2.c

index 29c17be..f90b343 100644 (file)
--- a/lib/libcompiler_rt/negsf2.c
+++ b/lib/libcompiler_rt/negsf2.c
@@ -14,9 +14,14 @@
  #define SINGLE_PRECISION
  #include "fp_lib.h"
  
-ARM_EABI_FNALIAS(fneg, negsf2)
-
  COMPILER_RT_ABI fp_t
  __negsf2(fp_t a) {
      return fromRep(toRep(a) ^ signBit);
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_fneg(fp_t a) {
+  return __negsf2(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/ppc/Makefile.mk b/lib/libcompiler_rt/ppc/Makefile.mk

deleted file mode 100644 (file)

index 0adc623..0000000
--- a/lib/libcompiler_rt/ppc/Makefile.mk
+++ /dev/null
@@ -1,20 +0,0 @@
-#===- lib/builtins/ppc/Makefile.mk -------------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs := 
-OnlyArchs := ppc
-
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
-Implementation := Optimized
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
diff --git a/lib/libcompiler_rt/subdf3.c b/lib/libcompiler_rt/subdf3.c

index 7a79e5e..38340df 100644 (file)
--- a/lib/libcompiler_rt/subdf3.c
+++ b/lib/libcompiler_rt/subdf3.c
@@ -15,11 +15,15 @@
  #define DOUBLE_PRECISION
  #include "fp_lib.h"
  
-ARM_EABI_FNALIAS(dsub, subdf3)
-
  // Subtraction; flip the sign bit of b and add.
  COMPILER_RT_ABI fp_t
  __subdf3(fp_t a, fp_t b) {
      return __adddf3(a, fromRep(toRep(b) ^ signBit));
  }
  
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_dsub(fp_t a, fp_t b) {
+  return __subdf3(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/subsf3.c b/lib/libcompiler_rt/subsf3.c

index c3b8514..34276b1 100644 (file)
--- a/lib/libcompiler_rt/subsf3.c
+++ b/lib/libcompiler_rt/subsf3.c
@@ -15,11 +15,15 @@
  #define SINGLE_PRECISION
  #include "fp_lib.h"
  
-ARM_EABI_FNALIAS(fsub, subsf3)
-
  // Subtraction; flip the sign bit of b and add.
  COMPILER_RT_ABI fp_t
  __subsf3(fp_t a, fp_t b) {
      return __addsf3(a, fromRep(toRep(b) ^ signBit));
  }
  
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_fsub(fp_t a, fp_t b) {
+  return __subsf3(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/truncdfhf2.c b/lib/libcompiler_rt/truncdfhf2.c

index 17195cd..4bb71aa 100644 (file)
--- a/lib/libcompiler_rt/truncdfhf2.c
+++ b/lib/libcompiler_rt/truncdfhf2.c
@@ -11,8 +11,13 @@
  #define DST_HALF
  #include "fp_trunc_impl.inc"
  
-ARM_EABI_FNALIAS(d2h, truncdfhf2)
-
  COMPILER_RT_ABI uint16_t __truncdfhf2(double a) {
      return __truncXfYf2__(a);
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI uint16_t __aeabi_d2h(double a) {
+  return __truncdfhf2(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/truncdfsf2.c b/lib/libcompiler_rt/truncdfsf2.c

index 46ec11d..8bf58bb 100644 (file)
--- a/lib/libcompiler_rt/truncdfsf2.c
+++ b/lib/libcompiler_rt/truncdfsf2.c
@@ -11,8 +11,13 @@
  #define DST_SINGLE
  #include "fp_trunc_impl.inc"
  
-ARM_EABI_FNALIAS(d2f, truncdfsf2)
-
  COMPILER_RT_ABI float __truncdfsf2(double a) {
      return __truncXfYf2__(a);
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI float __aeabi_d2f(double a) {
+  return __truncdfsf2(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/truncsfhf2.c b/lib/libcompiler_rt/truncsfhf2.c

index 9d61895..f6ce1fa 100644 (file)
--- a/lib/libcompiler_rt/truncsfhf2.c
+++ b/lib/libcompiler_rt/truncsfhf2.c
@@ -11,8 +11,6 @@
  #define DST_HALF
  #include "fp_trunc_impl.inc"
  
-ARM_EABI_FNALIAS(f2h, truncsfhf2)
-
  // Use a forwarding definition and noinline to implement a poor man's alias,
  // as there isn't a good cross-platform way of defining one.
  COMPILER_RT_ABI NOINLINE uint16_t __truncsfhf2(float a) {
@@ -22,3 +20,10 @@ COMPILER_RT_ABI NOINLINE uint16_t __truncsfhf2(float a) {
  COMPILER_RT_ABI uint16_t __gnu_f2h_ieee(float a) {
      return __truncsfhf2(a);
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI uint16_t __aeabi_f2h(float a) {
+  return __truncsfhf2(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/udivsi3.c b/lib/libcompiler_rt/udivsi3.c

index 5d0140c..8eccf10 100644 (file)
--- a/lib/libcompiler_rt/udivsi3.c
+++ b/lib/libcompiler_rt/udivsi3.c
@@ -18,8 +18,6 @@
  
  /* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
  
-ARM_EABI_FNALIAS(uidiv, udivsi3)
-
  /* This function should not call __divsi3! */
  COMPILER_RT_ABI su_int
  __udivsi3(su_int n, su_int d)
@@ -64,3 +62,10 @@ __udivsi3(su_int n, su_int d)
      q = (q << 1) | carry;
      return q;
  }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI su_int __aeabi_uidiv(su_int n, su_int d) {
+  return __udivsi3(n, d);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/x86_64/Makefile.mk b/lib/libcompiler_rt/x86_64/Makefile.mk

deleted file mode 100644 (file)

index 83848dd..0000000
--- a/lib/libcompiler_rt/x86_64/Makefile.mk
+++ /dev/null
@@ -1,20 +0,0 @@
-#===- lib/builtins/x86_64/Makefile.mk ----------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs := 
-OnlyArchs := x86_64 x86_64h
-
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
-Implementation := Optimized
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
diff --git a/lib/libcompiler_rt/x86_64/floatdidf.c b/lib/libcompiler_rt/x86_64/floatdidf.c

index 388404e..dead0ed 100644 (file)
--- a/lib/libcompiler_rt/x86_64/floatdidf.c
+++ b/lib/libcompiler_rt/x86_64/floatdidf.c
@@ -4,7 +4,7 @@
  
  /* double __floatdidf(di_int a); */
  
-#ifdef __x86_64__
+#if defined(__x86_64__) || defined(_M_X64)
  
  #include "../int_lib.h"
  
diff --git a/lib/libcompiler_rt/x86_64/floatdisf.c b/lib/libcompiler_rt/x86_64/floatdisf.c

index 96c3728..99d5621 100644 (file)
--- a/lib/libcompiler_rt/x86_64/floatdisf.c
+++ b/lib/libcompiler_rt/x86_64/floatdisf.c
@@ -2,7 +2,7 @@
   * License. See LICENSE.TXT for details.
   */
  
-#ifdef __x86_64__
+#if defined(__x86_64__) || defined(_M_X64)
  
  #include "../int_lib.h"
author	patrick <patrick@openbsd.org>
	Tue, 26 Dec 2017 20:59:44 +0000 (20:59 +0000)
committer	patrick <patrick@openbsd.org>
	Tue, 26 Dec 2017 20:59:44 +0000 (20:59 +0000)
lib/libcompiler_rt/CMakeLists.txt		patch \| blob \| history
lib/libcompiler_rt/Makefile		patch \| blob \| history
lib/libcompiler_rt/Makefile.mk	[deleted file]	patch \| blob \| history
lib/libcompiler_rt/README.txt		patch \| blob \| history
lib/libcompiler_rt/adddf3.c		patch \| blob \| history
lib/libcompiler_rt/addsf3.c		patch \| blob \| history
lib/libcompiler_rt/arm/Makefile.mk	[deleted file]	patch \| blob \| history
lib/libcompiler_rt/arm/adddf3vfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/addsf3.S	[new file with mode: 0644]	patch \| blob
lib/libcompiler_rt/arm/addsf3vfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/aeabi_cdcmp.S		patch \| blob \| history
lib/libcompiler_rt/arm/aeabi_cdcmpeq_check_nan.c		patch \| blob \| history
lib/libcompiler_rt/arm/aeabi_cfcmp.S		patch \| blob \| history
lib/libcompiler_rt/arm/aeabi_cfcmpeq_check_nan.c		patch \| blob \| history
lib/libcompiler_rt/arm/aeabi_dcmp.S		patch \| blob \| history
lib/libcompiler_rt/arm/aeabi_div0.c		patch \| blob \| history
lib/libcompiler_rt/arm/aeabi_drsub.c		patch \| blob \| history
lib/libcompiler_rt/arm/aeabi_fcmp.S		patch \| blob \| history
lib/libcompiler_rt/arm/aeabi_frsub.c		patch \| blob \| history
lib/libcompiler_rt/arm/aeabi_idivmod.S		patch \| blob \| history
lib/libcompiler_rt/arm/aeabi_ldivmod.S		patch \| blob \| history
lib/libcompiler_rt/arm/aeabi_memset.S		patch \| blob \| history
lib/libcompiler_rt/arm/aeabi_uidivmod.S		patch \| blob \| history
lib/libcompiler_rt/arm/aeabi_uldivmod.S		patch \| blob \| history
lib/libcompiler_rt/arm/comparesf2.S		patch \| blob \| history
lib/libcompiler_rt/arm/divdf3vfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/divsf3vfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/divsi3.S		patch \| blob \| history
lib/libcompiler_rt/arm/eqdf2vfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/eqsf2vfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/extendsfdf2vfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/fixdfsivfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/fixsfsivfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/fixunsdfsivfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/fixunssfsivfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/floatsidfvfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/floatsisfvfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/floatunssidfvfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/floatunssisfvfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/gedf2vfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/gesf2vfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/gtdf2vfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/gtsf2vfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/ledf2vfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/lesf2vfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/ltdf2vfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/ltsf2vfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/muldf3vfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/mulsf3vfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/nedf2vfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/negdf2vfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/negsf2vfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/nesf2vfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/subdf3vfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/subsf3vfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/truncdfsf2vfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/udivsi3.S		patch \| blob \| history
lib/libcompiler_rt/arm/unorddf2vfp.S		patch \| blob \| history
lib/libcompiler_rt/arm/unordsf2vfp.S		patch \| blob \| history
lib/libcompiler_rt/arm64/Makefile.mk	[deleted file]	patch \| blob \| history
lib/libcompiler_rt/armv6m/Makefile.mk	[deleted file]	patch \| blob \| history
lib/libcompiler_rt/ashldi3.c		patch \| blob \| history
lib/libcompiler_rt/ashrdi3.c		patch \| blob \| history
lib/libcompiler_rt/assembly.h		patch \| blob \| history
lib/libcompiler_rt/atomic.c		patch \| blob \| history
lib/libcompiler_rt/bswapdi2.c	[new file with mode: 0644]	patch \| blob
lib/libcompiler_rt/bswapsi2.c	[new file with mode: 0644]	patch \| blob
lib/libcompiler_rt/clear_cache.c		patch \| blob \| history
lib/libcompiler_rt/comparedf2.c		patch \| blob \| history
lib/libcompiler_rt/comparesf2.c		patch \| blob \| history
lib/libcompiler_rt/cpu_model.c		patch \| blob \| history
lib/libcompiler_rt/divdf3.c		patch \| blob \| history
lib/libcompiler_rt/divsf3.c		patch \| blob \| history
lib/libcompiler_rt/divsi3.c		patch \| blob \| history
lib/libcompiler_rt/divtc3.c		patch \| blob \| history
lib/libcompiler_rt/emutls.c		patch \| blob \| history
lib/libcompiler_rt/extendhfsf2.c		patch \| blob \| history
lib/libcompiler_rt/extendsfdf2.c		patch \| blob \| history
lib/libcompiler_rt/ffssi2.c	[new file with mode: 0644]	patch \| blob
lib/libcompiler_rt/fixdfdi.c		patch \| blob \| history
lib/libcompiler_rt/fixdfsi.c		patch \| blob \| history
lib/libcompiler_rt/fixsfdi.c		patch \| blob \| history
lib/libcompiler_rt/fixsfsi.c		patch \| blob \| history
lib/libcompiler_rt/fixunsdfdi.c		patch \| blob \| history
lib/libcompiler_rt/fixunsdfsi.c		patch \| blob \| history
lib/libcompiler_rt/fixunssfdi.c		patch \| blob \| history
lib/libcompiler_rt/fixunssfsi.c		patch \| blob \| history
lib/libcompiler_rt/floatdidf.c		patch \| blob \| history
lib/libcompiler_rt/floatdisf.c		patch \| blob \| history
lib/libcompiler_rt/floatsidf.c		patch \| blob \| history
lib/libcompiler_rt/floatsisf.c		patch \| blob \| history
lib/libcompiler_rt/floattitf.c	[new file with mode: 0644]	patch \| blob
lib/libcompiler_rt/floatundidf.c		patch \| blob \| history
lib/libcompiler_rt/floatundisf.c		patch \| blob \| history
lib/libcompiler_rt/floatunsidf.c		patch \| blob \| history
lib/libcompiler_rt/floatunsisf.c		patch \| blob \| history
lib/libcompiler_rt/floatuntitf.c	[new file with mode: 0644]	patch \| blob
lib/libcompiler_rt/i386/Makefile.mk	[deleted file]	patch \| blob \| history
lib/libcompiler_rt/int_lib.h		patch \| blob \| history
lib/libcompiler_rt/int_types.h		patch \| blob \| history
lib/libcompiler_rt/int_util.c		patch \| blob \| history
lib/libcompiler_rt/lshrdi3.c		patch \| blob \| history
lib/libcompiler_rt/mingw_fixfloat.c	[new file with mode: 0644]	patch \| blob
lib/libcompiler_rt/muldf3.c		patch \| blob \| history
lib/libcompiler_rt/muldi3.c		patch \| blob \| history
lib/libcompiler_rt/mulsf3.c		patch \| blob \| history
lib/libcompiler_rt/negdf2.c		patch \| blob \| history
lib/libcompiler_rt/negsf2.c		patch \| blob \| history
lib/libcompiler_rt/ppc/Makefile.mk	[deleted file]	patch \| blob \| history
lib/libcompiler_rt/subdf3.c		patch \| blob \| history
lib/libcompiler_rt/subsf3.c		patch \| blob \| history
lib/libcompiler_rt/truncdfhf2.c		patch \| blob \| history
lib/libcompiler_rt/truncdfsf2.c		patch \| blob \| history
lib/libcompiler_rt/truncsfhf2.c		patch \| blob \| history
lib/libcompiler_rt/udivsi3.c		patch \| blob \| history
lib/libcompiler_rt/x86_64/Makefile.mk	[deleted file]	patch \| blob \| history
lib/libcompiler_rt/x86_64/floatdidf.c		patch \| blob \| history
lib/libcompiler_rt/x86_64/floatdisf.c		patch \| blob \| history