From: patrick <patrick@openbsd.org>
Date: Tue, 26 Dec 2017 20:59:44 +0000 (+0000)
Subject: Update to compiler-rt 5.0.1.
X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=c72a1c0960669bfd1eb87723f902bc05ee727707;p=openbsd

Update to compiler-rt 5.0.1.

ok kettenis@
---

diff --git a/lib/libcompiler_rt/CMakeLists.txt b/lib/libcompiler_rt/CMakeLists.txt
index 44a660f5d49..f0d3f50714c 100644
--- a/lib/libcompiler_rt/CMakeLists.txt
+++ b/lib/libcompiler_rt/CMakeLists.txt
@@ -13,6 +13,10 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
     "${CMAKE_SOURCE_DIR}/../../cmake/Modules")
   include(base-config-ix)
   include(CompilerRTUtils)
+
+  load_llvm_config()
+  construct_compiler_rt_default_triple()
+
   if(APPLE)
     include(CompilerRTDarwinUtils)
   endif()
@@ -38,7 +42,8 @@ set(GENERIC_SOURCES
   ashlti3.c
   ashrdi3.c
   ashrti3.c
-  clear_cache.c
+  bswapdi2.c
+  bswapsi2.c
   clzdi2.c
   clzsi2.c
   clzti2.c
@@ -62,11 +67,10 @@ set(GENERIC_SOURCES
   divti3.c
   divtf3.c
   divxc3.c
-  enable_execute_stack.c
-  eprintf.c
   extendsfdf2.c
   extendhfsf2.c
   ffsdi2.c
+  ffssi2.c
   ffsti2.c
   fixdfdi.c
   fixdfsi.c
@@ -128,6 +132,7 @@ set(GENERIC_SOURCES
   negvdi2.c
   negvsi2.c
   negvti2.c
+  os_version_check.c
   paritydi2.c
   paritysi2.c
   parityti2.c
@@ -160,20 +165,43 @@ set(GENERIC_SOURCES
   umodsi3.c
   umodti3.c)
 
-if(COMPILER_RT_SUPPORTS_ATOMIC_KEYWORD)
+set(GENERIC_TF_SOURCES
+  comparetf2.c
+  extenddftf2.c
+  extendsftf2.c
+  fixtfdi.c
+  fixtfsi.c
+  fixtfti.c
+  fixunstfdi.c
+  fixunstfsi.c
+  fixunstfti.c
+  floatditf.c
+  floatsitf.c
+  floattitf.c
+  floatunditf.c
+  floatunsitf.c
+  floatuntitf.c
+  multc3.c
+  trunctfdf2.c
+  trunctfsf2.c)
+
+option(COMPILER_RT_EXCLUDE_ATOMIC_BUILTIN
+  "Skip the atomic builtin (this may be needed if system headers are unavailable)"
+  Off)
+
+if(NOT FUCHSIA AND NOT COMPILER_RT_BAREMETAL_BUILD)
   set(GENERIC_SOURCES
     ${GENERIC_SOURCES}
-    atomic.c)
+    emutls.c 
+    enable_execute_stack.c
+    eprintf.c)
 endif()
 
-set(MSVC_SOURCES
- divsc3.c
- divdc3.c
- divxc3.c
- mulsc3.c
- muldc3.c
- mulxc3.c)
-
+if(COMPILER_RT_HAS_ATOMIC_KEYWORD AND NOT COMPILER_RT_EXCLUDE_ATOMIC_BUILTIN)
+  set(GENERIC_SOURCES
+    ${GENERIC_SOURCES}
+    atomic.c)
+endif()
 
 if(APPLE)
   set(GENERIC_SOURCES
@@ -186,18 +214,18 @@ if(APPLE)
     atomic_thread_fence.c)
 endif()
 
-if(NOT WIN32 OR MINGW)
-  set(GENERIC_SOURCES
-      ${GENERIC_SOURCES}
-      emutls.c)
-endif()
-
 if (HAVE_UNWIND_H)
   set(GENERIC_SOURCES
       ${GENERIC_SOURCES}
       gcc_personality_v0.c)
 endif ()
 
+if (NOT FUCHSIA)
+  set(GENERIC_SOURCES
+    ${GENERIC_SOURCES}
+    clear_cache.c)
+endif()
+
 if (NOT MSVC)
   set(x86_64_SOURCES
       x86_64/chkstk.S
@@ -254,15 +282,54 @@ else () # MSVC
       x86_64/floatdidf.c
       x86_64/floatdisf.c
       x86_64/floatdixf.c
-      ${MSVC_SOURCES})
+      ${GENERIC_SOURCES})
   set(x86_64h_SOURCES ${x86_64_SOURCES})
-  set(i386_SOURCES ${MSVC_SOURCES})
+  set(i386_SOURCES ${GENERIC_SOURCES})
   set(i686_SOURCES ${i386_SOURCES})
 endif () # if (NOT MSVC)
 
 set(arm_SOURCES
-  arm/adddf3vfp.S
-  arm/addsf3vfp.S
+  arm/bswapdi2.S
+  arm/bswapsi2.S
+  arm/clzdi2.S
+  arm/clzsi2.S
+  arm/comparesf2.S
+  arm/divmodsi4.S
+  arm/divsi3.S
+  arm/modsi3.S
+  arm/sync_fetch_and_add_4.S
+  arm/sync_fetch_and_add_8.S
+  arm/sync_fetch_and_and_4.S
+  arm/sync_fetch_and_and_8.S
+  arm/sync_fetch_and_max_4.S
+  arm/sync_fetch_and_max_8.S
+  arm/sync_fetch_and_min_4.S
+  arm/sync_fetch_and_min_8.S
+  arm/sync_fetch_and_nand_4.S
+  arm/sync_fetch_and_nand_8.S
+  arm/sync_fetch_and_or_4.S
+  arm/sync_fetch_and_or_8.S
+  arm/sync_fetch_and_sub_4.S
+  arm/sync_fetch_and_sub_8.S
+  arm/sync_fetch_and_umax_4.S
+  arm/sync_fetch_and_umax_8.S
+  arm/sync_fetch_and_umin_4.S
+  arm/sync_fetch_and_umin_8.S
+  arm/sync_fetch_and_xor_4.S
+  arm/sync_fetch_and_xor_8.S
+  arm/udivmodsi4.S
+  arm/udivsi3.S
+  arm/umodsi3.S
+  ${GENERIC_SOURCES})
+
+set(thumb1_SOURCES
+  arm/divsi3.S
+  arm/udivsi3.S
+  arm/comparesf2.S
+  arm/addsf3.S
+  ${GENERIC_SOURCES})
+
+set(arm_EABI_SOURCES
   arm/aeabi_cdcmp.S
   arm/aeabi_cdcmpeq_check_nan.c
   arm/aeabi_cfcmp.S
@@ -279,16 +346,21 @@ set(arm_SOURCES
   arm/aeabi_memmove.S
   arm/aeabi_memset.S
   arm/aeabi_uidivmod.S
-  arm/aeabi_uldivmod.S
-  arm/bswapdi2.S
-  arm/bswapsi2.S
-  arm/clzdi2.S
-  arm/clzsi2.S
-  arm/comparesf2.S
+  arm/aeabi_uldivmod.S)
+
+set(arm_Thumb1_JT_SOURCES
+  arm/switch16.S
+  arm/switch32.S
+  arm/switch8.S
+  arm/switchu8.S)
+set(arm_Thumb1_SjLj_EH_SOURCES
+  arm/restore_vfp_d8_d15_regs.S
+  arm/save_vfp_d8_d15_regs.S)
+set(arm_Thumb1_VFPv2_SOURCES
+  arm/adddf3vfp.S
+  arm/addsf3vfp.S
   arm/divdf3vfp.S
-  arm/divmodsi4.S
   arm/divsf3vfp.S
-  arm/divsi3.S
   arm/eqdf2vfp.S
   arm/eqsf2vfp.S
   arm/extendsfdf2vfp.S
@@ -308,67 +380,64 @@ set(arm_SOURCES
   arm/lesf2vfp.S
   arm/ltdf2vfp.S
   arm/ltsf2vfp.S
-  arm/modsi3.S
   arm/muldf3vfp.S
   arm/mulsf3vfp.S
   arm/nedf2vfp.S
   arm/negdf2vfp.S
   arm/negsf2vfp.S
   arm/nesf2vfp.S
-  arm/restore_vfp_d8_d15_regs.S
-  arm/save_vfp_d8_d15_regs.S
   arm/subdf3vfp.S
   arm/subsf3vfp.S
-  arm/switch16.S
-  arm/switch32.S
-  arm/switch8.S
-  arm/switchu8.S
-  arm/sync_fetch_and_add_4.S
-  arm/sync_fetch_and_add_8.S
-  arm/sync_fetch_and_and_4.S
-  arm/sync_fetch_and_and_8.S
-  arm/sync_fetch_and_max_4.S
-  arm/sync_fetch_and_max_8.S
-  arm/sync_fetch_and_min_4.S
-  arm/sync_fetch_and_min_8.S
-  arm/sync_fetch_and_nand_4.S
-  arm/sync_fetch_and_nand_8.S
-  arm/sync_fetch_and_or_4.S
-  arm/sync_fetch_and_or_8.S
-  arm/sync_fetch_and_sub_4.S
-  arm/sync_fetch_and_sub_8.S
-  arm/sync_fetch_and_umax_4.S
-  arm/sync_fetch_and_umax_8.S
-  arm/sync_fetch_and_umin_4.S
-  arm/sync_fetch_and_umin_8.S
-  arm/sync_fetch_and_xor_4.S
-  arm/sync_fetch_and_xor_8.S
-  arm/sync_synchronize.S
   arm/truncdfsf2vfp.S
-  arm/udivmodsi4.S
-  arm/udivsi3.S
-  arm/umodsi3.S
   arm/unorddf2vfp.S
-  arm/unordsf2vfp.S
-  ${GENERIC_SOURCES})
+  arm/unordsf2vfp.S)
+set(arm_Thumb1_icache_SOURCES
+  arm/sync_synchronize.S)
+set(arm_Thumb1_SOURCES
+  ${arm_Thumb1_JT_SOURCES}
+  ${arm_Thumb1_SjLj_EH_SOURCES}
+  ${arm_Thumb1_VFPv2_SOURCES}
+  ${arm_Thumb1_icache_SOURCES})
+
+if(MINGW)
+  set(arm_SOURCES
+      arm/aeabi_idivmod.S
+      arm/aeabi_ldivmod.S
+      arm/aeabi_uidivmod.S
+      arm/aeabi_uldivmod.S
+      divmoddi4.c
+      divmodsi4.c
+      divdi3.c
+      divsi3.c
+      fixdfdi.c
+      fixsfdi.c
+      fixunsdfdi.c
+      fixunssfdi.c
+      floatdidf.c
+      floatdisf.c
+      floatundidf.c
+      floatundisf.c
+      mingw_fixfloat.c
+      moddi3.c
+      udivmoddi4.c
+      udivmodsi4.c
+      udivsi3.c
+      umoddi3.c
+      emutls.c)
+elseif(NOT WIN32)
+  # TODO the EABI sources should only be added to EABI targets
+  set(arm_SOURCES
+    ${arm_SOURCES}
+    ${arm_EABI_SOURCES}
+    ${arm_Thumb1_SOURCES})
+
+  set(thumb1_SOURCES
+    ${thumb1_SOURCES}
+    ${arm_EABI_SOURCES})
+endif()
 
 set(aarch64_SOURCES
-  comparetf2.c
-  extenddftf2.c
-  extendsftf2.c
-  fixtfdi.c
-  fixtfsi.c
-  fixtfti.c
-  fixunstfdi.c
-  fixunstfsi.c
-  fixunstfti.c
-  floatditf.c
-  floatsitf.c
-  floatunditf.c
-  floatunsitf.c
-  multc3.c
-  trunctfdf2.c
-  trunctfsf2.c
+  ${GENERIC_TF_SOURCES}
   ${GENERIC_SOURCES})
 
 set(armhf_SOURCES ${arm_SOURCES})
@@ -378,14 +447,16 @@ set(armv7k_SOURCES ${arm_SOURCES})
 set(arm64_SOURCES ${aarch64_SOURCES})
 
 # macho_embedded archs
-set(armv6m_SOURCES ${GENERIC_SOURCES})
+set(armv6m_SOURCES ${thumb1_SOURCES})
 set(armv7m_SOURCES ${arm_SOURCES})
 set(armv7em_SOURCES ${arm_SOURCES})
 
 set(mips_SOURCES ${GENERIC_SOURCES})
 set(mipsel_SOURCES ${mips_SOURCES})
-set(mips64_SOURCES ${mips_SOURCES})
-set(mips64el_SOURCES ${mips_SOURCES})
+set(mips64_SOURCES ${GENERIC_TF_SOURCES}
+                   ${mips_SOURCES})
+set(mips64el_SOURCES ${GENERIC_TF_SOURCES}
+                     ${mips_SOURCES})
 
 set(wasm32_SOURCES ${GENERIC_SOURCES})
 set(wasm64_SOURCES ${GENERIC_SOURCES})
@@ -398,26 +469,57 @@ if (APPLE)
   add_subdirectory(macho_embedded)
   darwin_add_builtin_libraries(${BUILTIN_SUPPORTED_OS})
 else ()
-  append_string_if(COMPILER_RT_HAS_STD_C99_FLAG -std=gnu99 maybe_stdc99)
+  set(BUILTIN_CFLAGS "")
+
+  append_list_if(COMPILER_RT_HAS_STD_C11_FLAG -std=c11 BUILTIN_CFLAGS)
+
+  # These flags would normally be added to CMAKE_C_FLAGS by the llvm
+  # cmake step. Add them manually if this is a standalone build.
+  if(COMPILER_RT_STANDALONE_BUILD)
+    append_list_if(COMPILER_RT_HAS_FPIC_FLAG -fPIC BUILTIN_CFLAGS)
+    append_list_if(COMPILER_RT_HAS_FNO_BUILTIN_FLAG -fno-builtin BUILTIN_CFLAGS)
+    append_list_if(COMPILER_RT_HAS_VISIBILITY_HIDDEN_FLAG -fvisibility=hidden BUILTIN_CFLAGS)
+    if(NOT COMPILER_RT_DEBUG)
+      append_list_if(COMPILER_RT_HAS_OMIT_FRAME_POINTER_FLAG -fomit-frame-pointer BUILTIN_CFLAGS)
+    endif()
+  endif()
+
+  set(BUILTIN_DEFS "")
+
+  append_list_if(COMPILER_RT_HAS_VISIBILITY_HIDDEN_FLAG VISIBILITY_HIDDEN BUILTIN_DEFS)
 
   foreach (arch ${BUILTIN_SUPPORTED_ARCH})
     if (CAN_TARGET_${arch})
+      # NOTE: some architectures (e.g. i386) have multiple names.  Ensure that
+      # we catch them all.
+      set(_arch ${arch})
+      if("${arch}" STREQUAL "i686")
+        set(_arch "i386|i686")
+      endif()
+
       # Filter out generic versions of routines that are re-implemented in
       # architecture specific manner.  This prevents multiple definitions of the
       # same symbols, making the symbol selection non-deterministic.
       foreach (_file ${${arch}_SOURCES})
-        if (${_file} MATCHES ${arch}/*)
+        if (${_file} MATCHES ${_arch}/*)
           get_filename_component(_name ${_file} NAME)
           string(REPLACE ".S" ".c" _cname "${_name}")
           list(REMOVE_ITEM ${arch}_SOURCES ${_cname})
         endif ()
       endforeach ()
 
+      # Needed for clear_cache on debug mode, due to r7's usage in inline asm.
+      # Release mode already sets it via -O2/3, Debug mode doesn't.
+      if (${arch} STREQUAL "armhf")
+        list(APPEND BUILTIN_CFLAGS -fomit-frame-pointer -DCOMPILER_RT_ARMHF_TARGET)
+      endif()
+
       add_compiler_rt_runtime(clang_rt.builtins
                               STATIC
                               ARCHS ${arch}
                               SOURCES ${${arch}_SOURCES}
-                              CFLAGS ${maybe_stdc99}
+                              DEFS ${BUILTIN_DEFS}
+                              CFLAGS ${BUILTIN_CFLAGS}
                               PARENT_TARGET builtins)
     endif ()
   endforeach ()
diff --git a/lib/libcompiler_rt/Makefile b/lib/libcompiler_rt/Makefile
index 3791940ef38..a12a1fa9bc4 100644
--- a/lib/libcompiler_rt/Makefile
+++ b/lib/libcompiler_rt/Makefile
@@ -1,4 +1,4 @@
-# $OpenBSD: Makefile,v 1.10 2017/09/08 19:04:00 naddy Exp $
+# $OpenBSD: Makefile,v 1.11 2017/12/26 20:59:44 patrick Exp $
 
 .include <bsd.own.mk>
 
@@ -41,6 +41,8 @@ GEN_SRCS=	absvdi2 \
 		ashrdi3 \
 		ashrti3 \
 		atomic \
+		bswapdi2 \
+		bswapsi2 \
 		clear_cache \
 		clzdi2 \
 		clzsi2 \
@@ -71,6 +73,7 @@ GEN_SRCS=	absvdi2 \
 		extendsfdf2 \
 		extendhfsf2 \
 		ffsdi2 \
+		ffssi2 \
 		ffsti2 \
 		fixdfdi \
 		fixdfsi \
@@ -195,8 +198,10 @@ SRCS+=	comparetf2.c \
 	fixunstfti.c \
 	floatditf.c \
 	floatsitf.c \
-	floatunsitf.c \
+	floattitf.c \
 	floatunditf.c \
+	floatunsitf.c \
+	floatuntitf.c \
 	multc3.c \
 	trunctfdf2.c \
 	trunctfsf2.c
@@ -220,8 +225,6 @@ SRCS+=	aeabi_cdcmp.S \
 	aeabi_memset.S \
 	aeabi_uidivmod.S \
 	aeabi_uldivmod.S \
-	bswapdi2.S \
-	bswapsi2.S \
 	switch16.S \
 	switch32.S \
 	switch8.S \
diff --git a/lib/libcompiler_rt/Makefile.mk b/lib/libcompiler_rt/Makefile.mk
deleted file mode 100644
index 00e2f53fc40..00000000000
--- a/lib/libcompiler_rt/Makefile.mk
+++ /dev/null
@@ -1,25 +0,0 @@
-#===- lib/builtins/Makefile.mk -----------------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs :=
-
-# Add arch specific optimized implementations.
-SubDirs += i386 ppc x86_64 arm armv6m
-
-# Add ARM64 dir.
-SubDirs += arm64
-
-# Define the variables for this specific directory.
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o)
-Implementation := Generic
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard $(Dir)/*.h)
diff --git a/lib/libcompiler_rt/README.txt b/lib/libcompiler_rt/README.txt
index ad36e4e5279..e603dfa0535 100644
--- a/lib/libcompiler_rt/README.txt
+++ b/lib/libcompiler_rt/README.txt
@@ -45,6 +45,7 @@ si_int __ctzsi2(si_int a);  // count trailing zeros
 si_int __ctzdi2(di_int a);  // count trailing zeros
 si_int __ctzti2(ti_int a);  // count trailing zeros
 
+si_int __ffssi2(si_int a);  // find least significant 1 bit
 si_int __ffsdi2(di_int a);  // find least significant 1 bit
 si_int __ffsti2(ti_int a);  // find least significant 1 bit
 
@@ -56,8 +57,8 @@ si_int __popcountsi2(si_int a);  // bit population
 si_int __popcountdi2(di_int a);  // bit population
 si_int __popcountti2(ti_int a);  // bit population
 
-uint32_t __bswapsi2(uint32_t a);   // a byteswapped, arm only
-uint64_t __bswapdi2(uint64_t a);   // a byteswapped, arm only
+uint32_t __bswapsi2(uint32_t a);   // a byteswapped
+uint64_t __bswapdi2(uint64_t a);   // a byteswapped
 
 // Integral arithmetic
 
diff --git a/lib/libcompiler_rt/adddf3.c b/lib/libcompiler_rt/adddf3.c
index 8b7aae0a6f8..c528e9e21f5 100644
--- a/lib/libcompiler_rt/adddf3.c
+++ b/lib/libcompiler_rt/adddf3.c
@@ -15,8 +15,13 @@
 #define DOUBLE_PRECISION
 #include "fp_add_impl.inc"
 
-ARM_EABI_FNALIAS(dadd, adddf3)
-
 COMPILER_RT_ABI double __adddf3(double a, double b){
     return __addXf3__(a, b);
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI double __aeabi_dadd(double a, double b) {
+  return __adddf3(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/addsf3.c b/lib/libcompiler_rt/addsf3.c
index 0f5d6ea4097..fe570687a25 100644
--- a/lib/libcompiler_rt/addsf3.c
+++ b/lib/libcompiler_rt/addsf3.c
@@ -15,8 +15,13 @@
 #define SINGLE_PRECISION
 #include "fp_add_impl.inc"
 
-ARM_EABI_FNALIAS(fadd, addsf3)
-
 COMPILER_RT_ABI float __addsf3(float a, float b) {
     return __addXf3__(a, b);
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI float __aeabi_fadd(float a, float b) {
+  return __addsf3(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/arm/Makefile.mk b/lib/libcompiler_rt/arm/Makefile.mk
deleted file mode 100644
index ed2e8323e39..00000000000
--- a/lib/libcompiler_rt/arm/Makefile.mk
+++ /dev/null
@@ -1,20 +0,0 @@
-#===- lib/builtins/arm/Makefile.mk -------------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs := 
-OnlyArchs := armv5 armv6 armv7 armv7k armv7m armv7em armv7s
-
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
-Implementation := Optimized
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
diff --git a/lib/libcompiler_rt/arm/adddf3vfp.S b/lib/libcompiler_rt/arm/adddf3vfp.S
index f4c00a03e05..8e476cad162 100644
--- a/lib/libcompiler_rt/arm/adddf3vfp.S
+++ b/lib/libcompiler_rt/arm/adddf3vfp.S
@@ -18,10 +18,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__adddf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vadd.f64 d0, d0, d1
+#else
 	vmov	d6, r0, r1		// move first param from r0/r1 pair into d6
 	vmov	d7, r2, r3		// move second param from r2/r3 pair into d7
 	vadd.f64 d6, d6, d7		
 	vmov	r0, r1, d6		// move result back to r0/r1 pair
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__adddf3vfp)
 
diff --git a/lib/libcompiler_rt/arm/addsf3.S b/lib/libcompiler_rt/arm/addsf3.S
new file mode 100644
index 00000000000..362b5c147ea
--- /dev/null
+++ b/lib/libcompiler_rt/arm/addsf3.S
@@ -0,0 +1,277 @@
+/*===-- addsf3.S - Adds two single precision floating pointer numbers-----===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __addsf3 (single precision floating pointer number
+ * addition with the IEEE-754 default rounding (to nearest, ties to even)
+ * function for the ARM Thumb1 ISA.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+#define significandBits 23
+#define typeWidth 32
+
+	.syntax unified
+	.text
+  .thumb
+  .p2align 2
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fadd, __addsf3)
+
+DEFINE_COMPILERRT_THUMB_FUNCTION(__addsf3)
+  push {r4, r5, r6, r7, lr}
+  // Get the absolute value of a and b.
+  lsls r2, r0, #1
+  lsls r3, r1, #1
+  lsrs r2, r2, #1  /* aAbs */
+  beq  LOCAL_LABEL(a_zero_nan_inf)
+  lsrs r3, r3, #1  /* bAbs */
+  beq  LOCAL_LABEL(zero_nan_inf)
+
+  // Detect if a or b is infinity or Nan.
+  lsrs r6, r2, #(significandBits)
+  lsrs r7, r3, #(significandBits)
+  cmp  r6, #0xFF
+  beq  LOCAL_LABEL(zero_nan_inf)
+  cmp  r7, #0xFF
+  beq  LOCAL_LABEL(zero_nan_inf)
+
+  // Swap Rep and Abs so that a and aAbs has the larger absolute value.
+  cmp r2, r3
+  bhs LOCAL_LABEL(no_swap)
+  movs r4, r0
+  movs r5, r2
+  movs r0, r1
+  movs r2, r3
+  movs r1, r4
+  movs r3, r5
+LOCAL_LABEL(no_swap):
+
+  // Get the significands and shift them to give us round, guard and sticky.
+  lsls r4, r0, #(typeWidth - significandBits)
+  lsrs r4, r4, #(typeWidth - significandBits - 3) /* aSignificand << 3 */
+  lsls r5, r1, #(typeWidth - significandBits)
+  lsrs r5, r5, #(typeWidth - significandBits - 3) /* bSignificand << 3 */
+
+  // Get the implicitBit.
+  movs r6, #1
+  lsls r6, r6, #(significandBits + 3)
+
+  // Get aExponent and set implicit bit if necessary.
+  lsrs r2, r2, #(significandBits)
+  beq LOCAL_LABEL(a_done_implicit_bit)
+  orrs r4, r6
+LOCAL_LABEL(a_done_implicit_bit):
+
+  // Get bExponent and set implicit bit if necessary.
+  lsrs r3, r3, #(significandBits)
+  beq LOCAL_LABEL(b_done_implicit_bit)
+  orrs r5, r6
+LOCAL_LABEL(b_done_implicit_bit):
+
+  // Get the difference in exponents.
+  subs r6, r2, r3
+  beq LOCAL_LABEL(done_align)
+
+  // If b is denormal, then a must be normal as align > 0, and we only need to
+  // right shift bSignificand by (align - 1) bits.
+  cmp  r3, #0
+  bne  1f
+  subs r6, r6, #1
+1:
+
+  // No longer needs bExponent. r3 is dead here.
+  // Set sticky bits of b: sticky = bSignificand << (typeWidth - align).
+  movs r3, #(typeWidth)
+  subs r3, r3, r6
+  movs r7, r5
+  lsls r7, r3
+  beq 1f
+  movs r7, #1
+1:
+
+  // bSignificand = bSignificand >> align | sticky;
+  lsrs r5, r6
+  orrs r5, r7
+  bne LOCAL_LABEL(done_align)
+  movs r5, #1 //  sticky; b is known to be non-zero.
+
+LOCAL_LABEL(done_align):
+  // isSubtraction = (aRep ^ bRep) >> 31;
+  movs r7, r0
+  eors r7, r1
+  lsrs r7, #31
+  bne LOCAL_LABEL(do_substraction)
+
+  // Same sign, do Addition.
+
+  // aSignificand += bSignificand;
+  adds r4, r4, r5
+
+  // Check carry bit.
+  movs r6, #1
+  lsls r6, r6, #(significandBits + 3 + 1)
+  movs r7, r4
+  ands r7, r6
+  beq LOCAL_LABEL(form_result)
+  // If the addition carried up, we need to right-shift the result and
+  // adjust the exponent.
+  movs r7, r4
+  movs r6, #1
+  ands r7, r6 // sticky = aSignificand & 1;
+  lsrs r4, #1
+  orrs r4, r7  // result Significand
+  adds r2, #1  // result Exponent
+  // If we have overflowed the type, return +/- infinity.
+  cmp  r2, 0xFF
+  beq  LOCAL_LABEL(ret_inf)
+
+LOCAL_LABEL(form_result):
+  // Shift the sign, exponent and significand into place.
+  lsrs r0, #(typeWidth - 1)
+  lsls r0, #(typeWidth - 1) // Get Sign.
+  lsls r2, #(significandBits)
+  orrs r0, r2
+  movs r1, r4
+  lsls r4, #(typeWidth - significandBits - 3)
+  lsrs r4, #(typeWidth - significandBits)
+  orrs r0, r4
+
+  // Final rounding.  The result may overflow to infinity, but that is the
+  // correct result in that case.
+  // roundGuardSticky = aSignificand & 0x7;
+  movs r2, #0x7
+  ands r1, r2
+  // if (roundGuardSticky > 0x4) result++;
+
+  cmp r1, #0x4
+  blt LOCAL_LABEL(done_round)
+  beq 1f
+  adds r0, #1
+  pop {r4, r5, r6, r7, pc}
+1:
+
+  // if (roundGuardSticky == 0x4) result += result & 1;
+  movs r1, r0
+  lsrs r1, #1
+  bcc  LOCAL_LABEL(done_round)
+  adds r0, r0, #1
+LOCAL_LABEL(done_round):
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(do_substraction):
+  subs r4, r4, r5 // aSignificand -= bSignificand;
+  beq  LOCAL_LABEL(ret_zero)
+  movs r6, r4
+  cmp  r2, 0
+  beq  LOCAL_LABEL(form_result) // if a's exp is 0, no need to normalize.
+  // If partial cancellation occured, we need to left-shift the result
+  // and adjust the exponent:
+  lsrs r6, r6, #(significandBits + 3)
+  bne LOCAL_LABEL(form_result)
+
+  push {r0, r1, r2, r3}
+  movs r0, r4
+  bl   __clzsi2
+  movs r5, r0
+  pop {r0, r1, r2, r3}
+  // shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3);
+  subs r5, r5, #(typeWidth - significandBits - 3 - 1)
+  // aSignificand <<= shift; aExponent -= shift;
+  lsls r4, r5
+  subs  r2, r2, r5
+  bgt LOCAL_LABEL(form_result)
+
+  // Do normalization if aExponent <= 0.
+  movs r6, #1
+  subs r6, r6, r2 // 1 - aExponent;
+  movs r2, #0 // aExponent = 0;
+  movs r3, #(typeWidth) // bExponent is dead.
+  subs r3, r3, r6
+  movs r7, r4
+  lsls r7, r3  // stickyBit = (bool)(aSignificant << (typeWidth - align))
+  beq 1f
+  movs r7, #1
+1:
+  lsrs r4, r6 /* aSignificand >> shift */
+  orrs r4, r7
+  b LOCAL_LABEL(form_result)
+
+LOCAL_LABEL(ret_zero):
+  movs r0, #0
+  pop {r4, r5, r6, r7, pc}
+
+
+LOCAL_LABEL(a_zero_nan_inf):
+  lsrs r3, r3, #1
+
+LOCAL_LABEL(zero_nan_inf):
+  // Here  r2 has aAbs, r3 has bAbs
+  movs r4, #0xFF
+  lsls r4, r4, #(significandBits) // Make +inf.
+
+  cmp r2, r4
+  bhi LOCAL_LABEL(a_is_nan)
+  cmp r3, r4
+  bhi LOCAL_LABEL(b_is_nan)
+
+  cmp r2, r4
+  bne LOCAL_LABEL(a_is_rational)
+  // aAbs is INF.
+  eors r1, r0 // aRep ^ bRep.
+  movs r6, #1
+  lsls r6, r6, #(typeWidth - 1) // get sign mask.
+  cmp r1, r6 // if they only differ on sign bit, it's -INF + INF
+  beq LOCAL_LABEL(a_is_nan)
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(a_is_rational):
+  cmp r3, r4
+  bne LOCAL_LABEL(b_is_rational)
+  movs r0, r1
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(b_is_rational):
+  // either a or b or both are zero.
+  adds r4, r2, r3
+  beq  LOCAL_LABEL(both_zero)
+  cmp r2, #0 // is absA 0 ?
+  beq LOCAL_LABEL(ret_b)
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(both_zero):
+  ands r0, r1 // +0 + -0 = +0
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(ret_b):
+  movs r0, r1
+
+LOCAL_LABEL(ret):
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(b_is_nan):
+  movs r0, r1
+LOCAL_LABEL(a_is_nan):
+  movs r1, #1
+  lsls r1, r1, #(significandBits -1) // r1 is quiet bit.
+  orrs r0, r1
+  pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(ret_inf):
+  movs r4, #0xFF
+  lsls r4, r4, #(significandBits)
+  orrs r0, r4
+  lsrs r0, r0, #(significandBits)
+  lsls r0, r0, #(significandBits)
+  pop {r4, r5, r6, r7, pc}
+
+
+END_COMPILERRT_FUNCTION(__addsf3)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/libcompiler_rt/arm/addsf3vfp.S b/lib/libcompiler_rt/arm/addsf3vfp.S
index af40c1cc92a..8871efdcc5d 100644
--- a/lib/libcompiler_rt/arm/addsf3vfp.S
+++ b/lib/libcompiler_rt/arm/addsf3vfp.S
@@ -18,10 +18,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__addsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vadd.f32 s0, s0, s1
+#else
 	vmov	s14, r0		// move first param from r0 into float register
 	vmov	s15, r1		// move second param from r1 into float register
 	vadd.f32 s14, s14, s15
 	vmov	r0, s14		// move result back to r0
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__addsf3vfp)
 
diff --git a/lib/libcompiler_rt/arm/aeabi_cdcmp.S b/lib/libcompiler_rt/arm/aeabi_cdcmp.S
index 8008f5fca26..3e7a8b86b73 100644
--- a/lib/libcompiler_rt/arm/aeabi_cdcmp.S
+++ b/lib/libcompiler_rt/arm/aeabi_cdcmp.S
@@ -30,13 +30,32 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmpeq)
         push {r0-r3, lr}
         bl __aeabi_cdcmpeq_check_nan
         cmp r0, #1
+#if __ARM_ARCH_ISA_THUMB == 1
+        beq 1f
+        // NaN has been ruled out, so __aeabi_cdcmple can't trap
+        mov r0, sp
+        ldm r0, {r0-r3}
+        bl __aeabi_cdcmple
+        pop {r0-r3, pc}
+1:
+        // Z = 0, C = 1
+        movs r0, #0xF
+        lsls r0, r0, #31
+        pop {r0-r3, pc}
+#else
         pop {r0-r3, lr}
 
         // NaN has been ruled out, so __aeabi_cdcmple can't trap
         bne __aeabi_cdcmple
 
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
+        mov ip, #APSR_C
+        msr APSR_nzcvq, ip
+#else
         msr CPSR_f, #APSR_C
+#endif
         JMP(lr)
+#endif
 END_COMPILERRT_FUNCTION(__aeabi_cdcmpeq)
 
 
@@ -59,19 +78,48 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmple)
 
         bl __aeabi_dcmplt
         cmp r0, #1
+#if __ARM_ARCH_ISA_THUMB == 1
+        bne 1f
+        // Z = 0, C = 0
+        movs r0, #1
+        lsls r0, r0, #1
+        pop {r0-r3, pc}
+1:
+        mov r0, sp
+        ldm r0, {r0-r3}
+        bl __aeabi_dcmpeq
+        cmp r0, #1
+        bne 2f
+        // Z = 1, C = 1
+        movs r0, #2
+        lsls r0, r0, #31
+        pop {r0-r3, pc}
+2:
+        // Z = 0, C = 1
+        movs r0, #0xF
+        lsls r0, r0, #31
+        pop {r0-r3, pc}
+#else
+        ITT(eq)
         moveq ip, #0
         beq 1f
 
         ldm sp, {r0-r3}
         bl __aeabi_dcmpeq
         cmp r0, #1
+        ITE(eq)
         moveq ip, #(APSR_C | APSR_Z)
         movne ip, #(APSR_C)
 
 1:
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
+        msr APSR_nzcvq, ip
+#else
         msr CPSR_f, ip
+#endif
         pop {r0-r3}
         POP_PC()
+#endif
 END_COMPILERRT_FUNCTION(__aeabi_cdcmple)
 
 // int __aeabi_cdrcmple(double a, double b) {
diff --git a/lib/libcompiler_rt/arm/aeabi_cdcmpeq_check_nan.c b/lib/libcompiler_rt/arm/aeabi_cdcmpeq_check_nan.c
index 577f6b2c553..7578433a1df 100644
--- a/lib/libcompiler_rt/arm/aeabi_cdcmpeq_check_nan.c
+++ b/lib/libcompiler_rt/arm/aeabi_cdcmpeq_check_nan.c
@@ -8,9 +8,9 @@
 //===----------------------------------------------------------------------===//
 
 #include <stdint.h>
+#include "../int_lib.h"
 
-__attribute__((pcs("aapcs")))
-__attribute__((visibility("hidden")))
+AEABI_RTABI __attribute__((visibility("hidden")))
 int __aeabi_cdcmpeq_check_nan(double a, double b) {
     return __builtin_isnan(a) || __builtin_isnan(b);
 }
diff --git a/lib/libcompiler_rt/arm/aeabi_cfcmp.S b/lib/libcompiler_rt/arm/aeabi_cfcmp.S
index 274baf7aecf..1f304ffd964 100644
--- a/lib/libcompiler_rt/arm/aeabi_cfcmp.S
+++ b/lib/libcompiler_rt/arm/aeabi_cfcmp.S
@@ -30,13 +30,32 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmpeq)
         push {r0-r3, lr}
         bl __aeabi_cfcmpeq_check_nan
         cmp r0, #1
+#if __ARM_ARCH_ISA_THUMB == 1
+        beq 1f
+        // NaN has been ruled out, so __aeabi_cfcmple can't trap
+        mov r0, sp
+        ldm r0, {r0-r3}
+        bl __aeabi_cfcmple
+        pop {r0-r3, pc}
+1:
+        // Z = 0, C = 1
+        movs r0, #0xF
+        lsls r0, r0, #31
+        pop {r0-r3, pc}
+#else
         pop {r0-r3, lr}
 
         // NaN has been ruled out, so __aeabi_cfcmple can't trap
         bne __aeabi_cfcmple
 
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
+        mov ip, #APSR_C
+        msr APSR_nzcvq, ip
+#else
         msr CPSR_f, #APSR_C
+#endif
         JMP(lr)
+#endif
 END_COMPILERRT_FUNCTION(__aeabi_cfcmpeq)
 
 
@@ -59,19 +78,48 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmple)
 
         bl __aeabi_fcmplt
         cmp r0, #1
+#if __ARM_ARCH_ISA_THUMB == 1
+        bne 1f
+        // Z = 0, C = 0
+        movs r0, #1
+        lsls r0, r0, #1
+        pop {r0-r3, pc}
+1:
+        mov r0, sp
+        ldm r0, {r0-r3}
+        bl __aeabi_fcmpeq
+        cmp r0, #1
+        bne 2f
+        // Z = 1, C = 1
+        movs r0, #2
+        lsls r0, r0, #31
+        pop {r0-r3, pc}
+2:
+        // Z = 0, C = 1
+        movs r0, #0xF
+        lsls r0, r0, #31
+        pop {r0-r3, pc}
+#else
+        ITT(eq)
         moveq ip, #0
         beq 1f
 
         ldm sp, {r0-r3}
         bl __aeabi_fcmpeq
         cmp r0, #1
+        ITE(eq)
         moveq ip, #(APSR_C | APSR_Z)
         movne ip, #(APSR_C)
 
 1:
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
+        msr APSR_nzcvq, ip
+#else
         msr CPSR_f, ip
+#endif
         pop {r0-r3}
         POP_PC()
+#endif
 END_COMPILERRT_FUNCTION(__aeabi_cfcmple)
 
 // int __aeabi_cfrcmple(float a, float b) {
diff --git a/lib/libcompiler_rt/arm/aeabi_cfcmpeq_check_nan.c b/lib/libcompiler_rt/arm/aeabi_cfcmpeq_check_nan.c
index 992e31fbd8d..43dde9a4959 100644
--- a/lib/libcompiler_rt/arm/aeabi_cfcmpeq_check_nan.c
+++ b/lib/libcompiler_rt/arm/aeabi_cfcmpeq_check_nan.c
@@ -8,9 +8,9 @@
 //===----------------------------------------------------------------------===//
 
 #include <stdint.h>
+#include "../int_lib.h"
 
-__attribute__((pcs("aapcs")))
-__attribute__((visibility("hidden")))
+AEABI_RTABI __attribute__((visibility("hidden")))
 int __aeabi_cfcmpeq_check_nan(float a, float b) {
     return __builtin_isnan(a) || __builtin_isnan(b);
 }
diff --git a/lib/libcompiler_rt/arm/aeabi_dcmp.S b/lib/libcompiler_rt/arm/aeabi_dcmp.S
index 43e439268d9..9fa78b46124 100644
--- a/lib/libcompiler_rt/arm/aeabi_dcmp.S
+++ b/lib/libcompiler_rt/arm/aeabi_dcmp.S
@@ -18,18 +18,27 @@
 //   }
 // }
 
+#if defined(COMPILER_RT_ARMHF_TARGET)
+#  define CONVERT_DCMP_ARGS_TO_DF2_ARGS                    \
+        vmov      d0, r0, r1                     SEPARATOR \
+        vmov      d1, r2, r3
+#else
+#  define CONVERT_DCMP_ARGS_TO_DF2_ARGS
+#endif
+
 #define DEFINE_AEABI_DCMP(cond)                            \
         .syntax unified                          SEPARATOR \
         .p2align 2                               SEPARATOR \
 DEFINE_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond)           \
         push      { r4, lr }                     SEPARATOR \
+        CONVERT_DCMP_ARGS_TO_DF2_ARGS            SEPARATOR \
         bl        SYMBOL_NAME(__ ## cond ## df2) SEPARATOR \
         cmp       r0, #0                         SEPARATOR \
         b ## cond 1f                             SEPARATOR \
-        mov       r0, #0                         SEPARATOR \
+        movs      r0, #0                         SEPARATOR \
         pop       { r4, pc }                     SEPARATOR \
 1:                                               SEPARATOR \
-        mov       r0, #1                         SEPARATOR \
+        movs      r0, #1                         SEPARATOR \
         pop       { r4, pc }                     SEPARATOR \
 END_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond)
 
diff --git a/lib/libcompiler_rt/arm/aeabi_div0.c b/lib/libcompiler_rt/arm/aeabi_div0.c
index ccc95fa5c12..dc3031326e3 100644
--- a/lib/libcompiler_rt/arm/aeabi_div0.c
+++ b/lib/libcompiler_rt/arm/aeabi_div0.c
@@ -26,16 +26,18 @@
  * line.
  */
 
+#include "../int_lib.h"
+
 /* provide an unused declaration to pacify pendantic compilation */
 extern unsigned char declaration;
 
 #if defined(__ARM_EABI__)
-int __attribute__((weak)) __attribute__((visibility("hidden")))
+AEABI_RTABI int __attribute__((weak)) __attribute__((visibility("hidden")))
 __aeabi_idiv0(int return_value) {
   return return_value;
 }
 
-long long __attribute__((weak)) __attribute__((visibility("hidden")))
+AEABI_RTABI long long __attribute__((weak)) __attribute__((visibility("hidden")))
 __aeabi_ldiv0(long long return_value) {
   return return_value;
 }
diff --git a/lib/libcompiler_rt/arm/aeabi_drsub.c b/lib/libcompiler_rt/arm/aeabi_drsub.c
index fc17d5a4cc7..1254886086f 100644
--- a/lib/libcompiler_rt/arm/aeabi_drsub.c
+++ b/lib/libcompiler_rt/arm/aeabi_drsub.c
@@ -10,10 +10,10 @@
 #define DOUBLE_PRECISION
 #include "../fp_lib.h"
 
-COMPILER_RT_ABI fp_t
+AEABI_RTABI fp_t
 __aeabi_dsub(fp_t, fp_t);
 
-COMPILER_RT_ABI fp_t
+AEABI_RTABI fp_t
 __aeabi_drsub(fp_t a, fp_t b) {
     return __aeabi_dsub(b, a);
 }
diff --git a/lib/libcompiler_rt/arm/aeabi_fcmp.S b/lib/libcompiler_rt/arm/aeabi_fcmp.S
index 0a1d92a60b6..ea5b96c21d5 100644
--- a/lib/libcompiler_rt/arm/aeabi_fcmp.S
+++ b/lib/libcompiler_rt/arm/aeabi_fcmp.S
@@ -18,18 +18,27 @@
 //   }
 // }
 
+#if defined(COMPILER_RT_ARMHF_TARGET)
+#  define CONVERT_FCMP_ARGS_TO_SF2_ARGS                    \
+        vmov      s0, r0                         SEPARATOR \
+        vmov      s1, r1
+#else
+#  define CONVERT_FCMP_ARGS_TO_SF2_ARGS
+#endif
+
 #define DEFINE_AEABI_FCMP(cond)                            \
         .syntax unified                          SEPARATOR \
         .p2align 2                               SEPARATOR \
 DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond)           \
         push      { r4, lr }                     SEPARATOR \
+        CONVERT_FCMP_ARGS_TO_SF2_ARGS            SEPARATOR \
         bl        SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \
         cmp       r0, #0                         SEPARATOR \
         b ## cond 1f                             SEPARATOR \
-        mov       r0, #0                         SEPARATOR \
+        movs      r0, #0                         SEPARATOR \
         pop       { r4, pc }                     SEPARATOR \
 1:                                               SEPARATOR \
-        mov       r0, #1                         SEPARATOR \
+        movs      r0, #1                         SEPARATOR \
         pop       { r4, pc }                     SEPARATOR \
 END_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond)
 
diff --git a/lib/libcompiler_rt/arm/aeabi_frsub.c b/lib/libcompiler_rt/arm/aeabi_frsub.c
index 64258dc7e07..34f2303745b 100644
--- a/lib/libcompiler_rt/arm/aeabi_frsub.c
+++ b/lib/libcompiler_rt/arm/aeabi_frsub.c
@@ -10,10 +10,10 @@
 #define SINGLE_PRECISION
 #include "../fp_lib.h"
 
-COMPILER_RT_ABI fp_t
+AEABI_RTABI fp_t
 __aeabi_fsub(fp_t, fp_t);
 
-COMPILER_RT_ABI fp_t
+AEABI_RTABI fp_t
 __aeabi_frsub(fp_t a, fp_t b) {
     return __aeabi_fsub(b, a);
 }
diff --git a/lib/libcompiler_rt/arm/aeabi_idivmod.S b/lib/libcompiler_rt/arm/aeabi_idivmod.S
index 2fcad862f73..0164b15dca1 100644
--- a/lib/libcompiler_rt/arm/aeabi_idivmod.S
+++ b/lib/libcompiler_rt/arm/aeabi_idivmod.S
@@ -15,16 +15,34 @@
 //   return {quot, rem};
 // }
 
+#if defined(__MINGW32__)
+#define __aeabi_idivmod __rt_sdiv
+#endif
+
         .syntax unified
         .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod)
+#if __ARM_ARCH_ISA_THUMB == 1
+        push    {r0, r1, lr}
+        bl      SYMBOL_NAME(__divsi3)
+        pop     {r1, r2, r3} // now r0 = quot, r1 = num, r2 = denom
+        muls    r2, r0, r2   // r2 = quot * denom
+        subs    r1, r1, r2
+        JMP     (r3)
+#else
         push    { lr }
         sub     sp, sp, #4
         mov     r2, sp
+#if defined(__MINGW32__)
+        mov     r3, r0
+        mov     r0, r1
+        mov     r1, r3
+#endif
         bl      SYMBOL_NAME(__divmodsi4)
         ldr     r1, [sp]
         add     sp, sp, #4
         pop     { pc }
+#endif // __ARM_ARCH_ISA_THUMB == 1
 END_COMPILERRT_FUNCTION(__aeabi_idivmod)
 
 NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/libcompiler_rt/arm/aeabi_ldivmod.S b/lib/libcompiler_rt/arm/aeabi_ldivmod.S
index 9f161f3007f..038ae5d723a 100644
--- a/lib/libcompiler_rt/arm/aeabi_ldivmod.S
+++ b/lib/libcompiler_rt/arm/aeabi_ldivmod.S
@@ -16,18 +16,30 @@
 //   return {quot, rem};
 // }
 
+#if defined(__MINGW32__)
+#define __aeabi_ldivmod __rt_sdiv64
+#endif
+
         .syntax unified
         .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__aeabi_ldivmod)
-        push    {r11, lr}
+        push    {r6, lr}
         sub     sp, sp, #16
-        add     r12, sp, #8
-        str     r12, [sp]
+        add     r6, sp, #8
+        str     r6, [sp]
+#if defined(__MINGW32__)
+        movs    r6, r0
+        movs    r0, r2
+        movs    r2, r6
+        movs    r6, r1
+        movs    r1, r3
+        movs    r3, r6
+#endif
         bl      SYMBOL_NAME(__divmoddi4)
         ldr     r2, [sp, #8]
         ldr     r3, [sp, #12]
         add     sp, sp, #16
-        pop     {r11, pc}
+        pop     {r6, pc}
 END_COMPILERRT_FUNCTION(__aeabi_ldivmod)
 
 NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/libcompiler_rt/arm/aeabi_memset.S b/lib/libcompiler_rt/arm/aeabi_memset.S
index 48edd89705b..633f592279b 100644
--- a/lib/libcompiler_rt/arm/aeabi_memset.S
+++ b/lib/libcompiler_rt/arm/aeabi_memset.S
@@ -26,7 +26,7 @@ DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset8, __aeabi_memset)
 
 DEFINE_COMPILERRT_FUNCTION(__aeabi_memclr)
         mov     r2, r1
-        mov     r1, #0
+        movs    r1, #0
         b       memset
 END_COMPILERRT_FUNCTION(__aeabi_memclr)
 
diff --git a/lib/libcompiler_rt/arm/aeabi_uidivmod.S b/lib/libcompiler_rt/arm/aeabi_uidivmod.S
index e1e12d97aa0..a627fc740a0 100644
--- a/lib/libcompiler_rt/arm/aeabi_uidivmod.S
+++ b/lib/libcompiler_rt/arm/aeabi_uidivmod.S
@@ -16,16 +16,40 @@
 //   return {quot, rem};
 // }
 
+#if defined(__MINGW32__)
+#define __aeabi_uidivmod __rt_udiv
+#endif
+
         .syntax unified
         .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod)
+#if __ARM_ARCH_ISA_THUMB == 1
+        cmp     r0, r1
+        bcc     LOCAL_LABEL(case_denom_larger)
+        push    {r0, r1, lr}
+        bl      SYMBOL_NAME(__aeabi_uidiv)
+        pop     {r1, r2, r3}
+        muls    r2, r0, r2 // r2 = quot * denom
+        subs    r1, r1, r2
+        JMP     (r3)
+LOCAL_LABEL(case_denom_larger):
+        movs    r1, r0
+        movs    r0, #0
+        JMP     (lr)
+#else
         push    { lr }
         sub     sp, sp, #4
         mov     r2, sp
+#if defined(__MINGW32__)
+        mov     r3, r0
+        mov     r0, r1
+        mov     r1, r3
+#endif
         bl      SYMBOL_NAME(__udivmodsi4)
         ldr     r1, [sp]
         add     sp, sp, #4
         pop     { pc }
+#endif
 END_COMPILERRT_FUNCTION(__aeabi_uidivmod)
 
 NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/libcompiler_rt/arm/aeabi_uldivmod.S b/lib/libcompiler_rt/arm/aeabi_uldivmod.S
index e8aaef282e9..be343b6bc82 100644
--- a/lib/libcompiler_rt/arm/aeabi_uldivmod.S
+++ b/lib/libcompiler_rt/arm/aeabi_uldivmod.S
@@ -16,18 +16,30 @@
 //   return {quot, rem};
 // }
 
+#if defined(__MINGW32__)
+#define __aeabi_uldivmod __rt_udiv64
+#endif
+
         .syntax unified
         .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__aeabi_uldivmod)
-        push	{r11, lr}
+        push	{r6, lr}
         sub	sp, sp, #16
-        add	r12, sp, #8
-        str	r12, [sp]
+        add	r6, sp, #8
+        str	r6, [sp]
+#if defined(__MINGW32__)
+        movs    r6, r0
+        movs    r0, r2
+        movs    r2, r6
+        movs    r6, r1
+        movs    r1, r3
+        movs    r3, r6
+#endif
         bl	SYMBOL_NAME(__udivmoddi4)
         ldr	r2, [sp, #8]
         ldr	r3, [sp, #12]
         add	sp, sp, #16
-        pop	{r11, pc}
+        pop	{r6, pc}
 END_COMPILERRT_FUNCTION(__aeabi_uldivmod)
 
 NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/libcompiler_rt/arm/comparesf2.S b/lib/libcompiler_rt/arm/comparesf2.S
index 52597b673f9..ef7091bf3c8 100644
--- a/lib/libcompiler_rt/arm/comparesf2.S
+++ b/lib/libcompiler_rt/arm/comparesf2.S
@@ -39,32 +39,64 @@
 
 #include "../assembly.h"
 .syntax unified
+#if __ARM_ARCH_ISA_THUMB == 2
+.thumb
+#endif
 
-.p2align 2
+@ int __eqsf2(float a, float b)
+
+    .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__eqsf2)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+    vmov r0, s0
+    vmov r1, s1
+#endif
     // Make copies of a and b with the sign bit shifted off the top.  These will
     // be used to detect zeros and NaNs.
+#if __ARM_ARCH_ISA_THUMB == 1
+    push    {r6, lr}
+    lsls    r2,         r0, #1
+    lsls    r3,         r1, #1
+#else
     mov     r2,         r0, lsl #1
     mov     r3,         r1, lsl #1
+#endif
 
     // We do the comparison in three stages (ignoring NaN values for the time
     // being).  First, we orr the absolute values of a and b; this sets the Z
     // flag if both a and b are zero (of either sign).  The shift of r3 doesn't
     // effect this at all, but it *does* make sure that the C flag is clear for
     // the subsequent operations.
+#if __ARM_ARCH_ISA_THUMB == 1
+    lsrs    r6,     r3, #1
+    orrs    r6,     r2
+#else
     orrs    r12,    r2, r3, lsr #1
-
+#endif
     // Next, we check if a and b have the same or different signs.  If they have
     // opposite signs, this eor will set the N flag.
+#if __ARM_ARCH_ISA_THUMB == 1
+    beq     1f
+    movs    r6,     r0
+    eors    r6,     r1
+1:
+#else
     it ne
     eorsne  r12,    r0, r1
+#endif
 
     // If a and b are equal (either both zeros or bit identical; again, we're
     // ignoring NaNs for now), this subtract will zero out r0.  If they have the
     // same sign, the flags are updated as they would be for a comparison of the
     // absolute values of a and b.
+#if __ARM_ARCH_ISA_THUMB == 1
+    bmi     1f
+    subs    r0,     r2, r3
+1:
+#else
     it pl
     subspl  r0,     r2, r3
+#endif
 
     // If a is smaller in magnitude than b and both have the same sign, place
     // the negation of the sign of b in r0.  Thus, if both are negative and
@@ -76,41 +108,126 @@ DEFINE_COMPILERRT_FUNCTION(__eqsf2)
     // still clear from the shift argument in orrs; if a is positive and b
     // negative, this places 0 in r0; if a is negative and b positive, -1 is
     // placed in r0.
+#if __ARM_ARCH_ISA_THUMB == 1
+    bhs     1f
+    // Here if a and b have the same sign and absA < absB, the result is thus
+    // b < 0 ? 1 : -1. Same if a and b have the opposite sign (ignoring Nan).
+    movs    r0,         #1
+    lsrs    r1,         #31
+    bne     LOCAL_LABEL(CHECK_NAN)
+    negs    r0,         r0
+    b       LOCAL_LABEL(CHECK_NAN)
+1:
+#else
     it lo
     mvnlo   r0,         r1, asr #31
+#endif
 
     // If a is greater in magnitude than b and both have the same sign, place
     // the sign of b in r0.  Thus, if both are negative and a < b, -1 is placed
     // in r0, which is the desired result.  Conversely, if both are positive
     // and a > b, zero is placed in r0.
+#if __ARM_ARCH_ISA_THUMB == 1
+    bls     1f
+    // Here both have the same sign and absA > absB.
+    movs    r0,         #1
+    lsrs    r1,         #31
+    beq     LOCAL_LABEL(CHECK_NAN)
+    negs    r0, r0
+1:
+#else
     it hi
     movhi   r0,         r1, asr #31
+#endif
 
     // If you've been keeping track, at this point r0 contains -1 if a < b and
     // 0 if a >= b.  All that remains to be done is to set it to 1 if a > b.
     // If a == b, then the Z flag is set, so we can get the correct final value
     // into r0 by simply or'ing with 1 if Z is clear.
+    // For Thumb-1, r0 contains -1 if a < b, 0 if a > b and 0 if a == b.
+#if __ARM_ARCH_ISA_THUMB != 1
     it ne
     orrne   r0,     r0, #1
+#endif
 
     // Finally, we need to deal with NaNs.  If either argument is NaN, replace
     // the value in r0 with 1.
+#if __ARM_ARCH_ISA_THUMB == 1
+LOCAL_LABEL(CHECK_NAN):
+    movs    r6,         #0xff
+    lsls    r6,         #24
+    cmp     r2,         r6
+    bhi     1f
+    cmp     r3,         r6
+1:
+    bls     2f
+    movs    r0,         #1
+2:
+    pop     {r6, pc}
+#else
     cmp     r2,         #0xff000000
     ite ls
     cmpls   r3,         #0xff000000
     movhi   r0,         #1
     JMP(lr)
+#endif
 END_COMPILERRT_FUNCTION(__eqsf2)
+
 DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __eqsf2)
 DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __eqsf2)
 DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __eqsf2)
 
-.p2align 2
+@ int __gtsf2(float a, float b)
+
+    .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__gtsf2)
     // Identical to the preceding except in that we return -1 for NaN values.
-    // Given that the two paths share so much code, one might be tempted to 
+    // Given that the two paths share so much code, one might be tempted to
     // unify them; however, the extra code needed to do so makes the code size
     // to performance tradeoff very hard to justify for such small functions.
+#if defined(COMPILER_RT_ARMHF_TARGET)
+    vmov r0, s0
+    vmov r1, s1
+#endif
+#if __ARM_ARCH_ISA_THUMB == 1
+    push    {r6, lr}
+    lsls    r2,        r0, #1
+    lsls    r3,        r1, #1
+    lsrs    r6,        r3, #1
+    orrs    r6,        r2
+    beq     1f
+    movs    r6,        r0
+    eors    r6,        r1
+1:
+    bmi     2f
+    subs    r0,        r2, r3
+2:
+    bhs     3f
+    movs    r0,        #1
+    lsrs    r1,        #31
+    bne     LOCAL_LABEL(CHECK_NAN_2)
+    negs    r0, r0
+    b       LOCAL_LABEL(CHECK_NAN_2)
+3:
+    bls     4f
+    movs    r0,         #1
+    lsrs    r1,         #31
+    beq     LOCAL_LABEL(CHECK_NAN_2)
+    negs    r0, r0
+4:
+LOCAL_LABEL(CHECK_NAN_2):
+    movs    r6,         #0xff
+    lsls    r6,         #24
+    cmp     r2,         r6
+    bhi     5f
+    cmp     r3,         r6
+5:
+    bls     6f
+    movs    r0,         #1
+    negs    r0,         r0
+6:
+    pop     {r6, pc}
+#else
     mov     r2,         r0, lsl #1
     mov     r3,         r1, lsl #1
     orrs    r12,    r2, r3, lsr #1
@@ -129,23 +246,51 @@ DEFINE_COMPILERRT_FUNCTION(__gtsf2)
     cmpls   r3,         #0xff000000
     movhi   r0,         #-1
     JMP(lr)
+#endif
 END_COMPILERRT_FUNCTION(__gtsf2)
+
 DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __gtsf2)
 
-.p2align 2
+@ int __unordsf2(float a, float b)
+
+    .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__unordsf2)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+    vmov    r0,         s0
+    vmov    r1,         s1
+#endif
     // Return 1 for NaN values, 0 otherwise.
-    mov     r2,         r0, lsl #1
-    mov     r3,         r1, lsl #1
-    mov     r0,         #0
+    lsls    r2,         r0, #1
+    lsls    r3,         r1, #1
+    movs    r0,         #0
+#if __ARM_ARCH_ISA_THUMB == 1
+    movs    r1,         #0xff
+    lsls    r1,         #24
+    cmp     r2,         r1
+    bhi     1f
+    cmp     r3,         r1
+1:
+    bls     2f
+    movs    r0,         #1
+2:
+#else
     cmp     r2,         #0xff000000
     ite ls
     cmpls   r3,         #0xff000000
     movhi   r0,         #1
+#endif
     JMP(lr)
 END_COMPILERRT_FUNCTION(__unordsf2)
 
+#if defined(COMPILER_RT_ARMHF_TARGET)
+DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmpum)
+	vmov s0, r0
+	vmov s1, r1
+	b SYMBOL_NAME(__unordsf2)
+END_COMPILERRT_FUNCTION(__aeabi_fcmpum)
+#else
 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fcmpun, __unordsf2)
+#endif
 
 NO_EXEC_STACK_DIRECTIVE
 
diff --git a/lib/libcompiler_rt/arm/divdf3vfp.S b/lib/libcompiler_rt/arm/divdf3vfp.S
index 928f53809f1..776ba4f24b4 100644
--- a/lib/libcompiler_rt/arm/divdf3vfp.S
+++ b/lib/libcompiler_rt/arm/divdf3vfp.S
@@ -18,10 +18,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__divdf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vdiv.f64 d0, d0, d1
+#else
 	vmov	d6, r0, r1		// move first param from r0/r1 pair into d6
 	vmov	d7, r2, r3		// move second param from r2/r3 pair into d7
-	vdiv.f64 d5, d6, d7		
+	vdiv.f64 d5, d6, d7
 	vmov	r0, r1, d5		// move result back to r0/r1 pair
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__divdf3vfp)
 
diff --git a/lib/libcompiler_rt/arm/divsf3vfp.S b/lib/libcompiler_rt/arm/divsf3vfp.S
index a2e297f7015..130318f0c37 100644
--- a/lib/libcompiler_rt/arm/divsf3vfp.S
+++ b/lib/libcompiler_rt/arm/divsf3vfp.S
@@ -18,10 +18,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__divsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vdiv.f32 s0, s0, s1
+#else
 	vmov	s14, r0		// move first param from r0 into float register
 	vmov	s15, r1		// move second param from r1 into float register
 	vdiv.f32 s13, s14, s15
 	vmov	r0, s13		// move result back to r0
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__divsf3vfp)
 
diff --git a/lib/libcompiler_rt/arm/divsi3.S b/lib/libcompiler_rt/arm/divsi3.S
index 7e23ba4fc23..f066f60ad96 100644
--- a/lib/libcompiler_rt/arm/divsi3.S
+++ b/lib/libcompiler_rt/arm/divsi3.S
@@ -49,17 +49,37 @@ LOCAL_LABEL(divzero):
 #else
 ESTABLISH_FRAME
 //  Set aside the sign of the quotient.
+#  if __ARM_ARCH_ISA_THUMB == 1
+    movs    r4,     r0
+    eors    r4,     r1
+#  else
     eor     r4,     r0, r1
+#  endif
 //  Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31).
+#  if   __ARM_ARCH_ISA_THUMB == 1
+    asrs    r2,     r0, #31
+    asrs    r3,     r1, #31
+    eors    r0,     r2
+    eors    r1,     r3
+    subs    r0,     r0, r2
+    subs    r1,     r1, r3
+#  else
     eor     r2,     r0, r0, asr #31
     eor     r3,     r1, r1, asr #31
     sub     r0,     r2, r0, asr #31
     sub     r1,     r3, r1, asr #31
+#  endif
 //  abs(a) / abs(b)
     bl      SYMBOL_NAME(__udivsi3)
 //  Apply sign of quotient to result and return.
+#  if __ARM_ARCH_ISA_THUMB == 1
+    asrs    r4,     #31
+    eors    r0,     r4
+    subs    r0,     r0, r4
+#  else
     eor     r0,     r0, r4, asr #31
     sub     r0,     r0, r4, asr #31
+#  endif
     CLEAR_FRAME_AND_RETURN
 #endif
 END_COMPILERRT_FUNCTION(__divsi3)
diff --git a/lib/libcompiler_rt/arm/eqdf2vfp.S b/lib/libcompiler_rt/arm/eqdf2vfp.S
index 95e6bb36334..d5070657091 100644
--- a/lib/libcompiler_rt/arm/eqdf2vfp.S
+++ b/lib/libcompiler_rt/arm/eqdf2vfp.S
@@ -19,10 +19,15 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__eqdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
 	vmov	d6, r0, r1	// load r0/r1 pair in double register
 	vmov	d7, r2, r3	// load r2/r3 pair in double register
 	vcmp.f64 d6, d7		
+#endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(eq)
 	moveq	r0, #1		// set result register to 1 if equal
 	movne	r0, #0
 	bx	lr
diff --git a/lib/libcompiler_rt/arm/eqsf2vfp.S b/lib/libcompiler_rt/arm/eqsf2vfp.S
index fbac139c193..fd72b2fdbde 100644
--- a/lib/libcompiler_rt/arm/eqsf2vfp.S
+++ b/lib/libcompiler_rt/arm/eqsf2vfp.S
@@ -19,10 +19,15 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__eqsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
 	vmov	s14, r0     // move from GPR 0 to float register
 	vmov	s15, r1	    // move from GPR 1 to float register
 	vcmp.f32 s14, s15
+#endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(eq)
 	moveq	r0, #1      // set result register to 1 if equal
 	movne	r0, #0
 	bx	lr
diff --git a/lib/libcompiler_rt/arm/extendsfdf2vfp.S b/lib/libcompiler_rt/arm/extendsfdf2vfp.S
index 563bf92afc3..1079f977bae 100644
--- a/lib/libcompiler_rt/arm/extendsfdf2vfp.S
+++ b/lib/libcompiler_rt/arm/extendsfdf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__extendsfdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcvt.f64.f32 d0, s0
+#else
 	vmov	s15, r0      // load float register from R0
 	vcvt.f64.f32 d7, s15 // convert single to double
 	vmov	r0, r1, d7   // return result in r0/r1 pair
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__extendsfdf2vfp)
 
diff --git a/lib/libcompiler_rt/arm/fixdfsivfp.S b/lib/libcompiler_rt/arm/fixdfsivfp.S
index 8263ff942f8..5d7b0f85654 100644
--- a/lib/libcompiler_rt/arm/fixdfsivfp.S
+++ b/lib/libcompiler_rt/arm/fixdfsivfp.S
@@ -19,9 +19,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__fixdfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcvt.s32.f64 s0, d0
+	vmov r0, s0
+#else
 	vmov	d7, r0, r1    // load double register from R0/R1
 	vcvt.s32.f64 s15, d7  // convert double to 32-bit int into s15
 	vmov	r0, s15	      // move s15 to result register
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__fixdfsivfp)
 
diff --git a/lib/libcompiler_rt/arm/fixsfsivfp.S b/lib/libcompiler_rt/arm/fixsfsivfp.S
index c7c3b811787..805a277afa3 100644
--- a/lib/libcompiler_rt/arm/fixsfsivfp.S
+++ b/lib/libcompiler_rt/arm/fixsfsivfp.S
@@ -19,9 +19,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__fixsfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcvt.s32.f32 s0, s0
+	vmov r0, s0
+#else
 	vmov	s15, r0        // load float register from R0
 	vcvt.s32.f32 s15, s15  // convert single to 32-bit int into s15
 	vmov	r0, s15	       // move s15 to result register
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__fixsfsivfp)
 
diff --git a/lib/libcompiler_rt/arm/fixunsdfsivfp.S b/lib/libcompiler_rt/arm/fixunsdfsivfp.S
index 9cc1e628699..4f1b2c8cefd 100644
--- a/lib/libcompiler_rt/arm/fixunsdfsivfp.S
+++ b/lib/libcompiler_rt/arm/fixunsdfsivfp.S
@@ -20,9 +20,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__fixunsdfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcvt.u32.f64 s0, d0
+	vmov r0, s0
+#else
 	vmov	d7, r0, r1    // load double register from R0/R1
 	vcvt.u32.f64 s15, d7  // convert double to 32-bit int into s15
 	vmov	r0, s15	      // move s15 to result register
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__fixunsdfsivfp)
 
diff --git a/lib/libcompiler_rt/arm/fixunssfsivfp.S b/lib/libcompiler_rt/arm/fixunssfsivfp.S
index 79d70822911..e5d77823687 100644
--- a/lib/libcompiler_rt/arm/fixunssfsivfp.S
+++ b/lib/libcompiler_rt/arm/fixunssfsivfp.S
@@ -20,9 +20,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__fixunssfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcvt.u32.f32 s0, s0
+	vmov r0, s0
+#else
 	vmov	s15, r0        // load float register from R0
 	vcvt.u32.f32 s15, s15  // convert single to 32-bit unsigned into s15
 	vmov	r0, s15	       // move s15 to result register
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__fixunssfsivfp)
 
diff --git a/lib/libcompiler_rt/arm/floatsidfvfp.S b/lib/libcompiler_rt/arm/floatsidfvfp.S
index 7623f26c6e6..3297ad44d8c 100644
--- a/lib/libcompiler_rt/arm/floatsidfvfp.S
+++ b/lib/libcompiler_rt/arm/floatsidfvfp.S
@@ -19,9 +19,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__floatsidfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vmov s0, r0
+	vcvt.f64.s32 d0, s0
+#else
 	vmov	s15, r0        // move int to float register s15
 	vcvt.f64.s32 d7, s15   // convert 32-bit int in s15 to double in d7
 	vmov	r0, r1, d7     // move d7 to result register pair r0/r1
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__floatsidfvfp)
 
diff --git a/lib/libcompiler_rt/arm/floatsisfvfp.S b/lib/libcompiler_rt/arm/floatsisfvfp.S
index c73dfac13eb..65408b54b8d 100644
--- a/lib/libcompiler_rt/arm/floatsisfvfp.S
+++ b/lib/libcompiler_rt/arm/floatsisfvfp.S
@@ -19,9 +19,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__floatsisfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vmov s0, r0
+	vcvt.f32.s32 s0, s0
+#else
 	vmov	s15, r0	       // move int to float register s15
 	vcvt.f32.s32 s15, s15  // convert 32-bit int in s15 to float in s15
 	vmov	r0, s15        // move s15 to result register
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__floatsisfvfp)
 
diff --git a/lib/libcompiler_rt/arm/floatunssidfvfp.S b/lib/libcompiler_rt/arm/floatunssidfvfp.S
index 2a59fdb830b..d7a7024a25b 100644
--- a/lib/libcompiler_rt/arm/floatunssidfvfp.S
+++ b/lib/libcompiler_rt/arm/floatunssidfvfp.S
@@ -19,9 +19,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__floatunssidfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vmov s0, r0
+	vcvt.f64.u32 d0, s0
+#else
 	vmov	s15, r0        // move int to float register s15
 	vcvt.f64.u32 d7, s15   // convert 32-bit int in s15 to double in d7
 	vmov	r0, r1, d7     // move d7 to result register pair r0/r1
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__floatunssidfvfp)
 
diff --git a/lib/libcompiler_rt/arm/floatunssisfvfp.S b/lib/libcompiler_rt/arm/floatunssisfvfp.S
index c096263c1bc..1ca856519a9 100644
--- a/lib/libcompiler_rt/arm/floatunssisfvfp.S
+++ b/lib/libcompiler_rt/arm/floatunssisfvfp.S
@@ -19,9 +19,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__floatunssisfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vmov s0, r0
+	vcvt.f32.u32 s0, s0
+#else
 	vmov	s15, r0	       // move int to float register s15
 	vcvt.f32.u32 s15, s15  // convert 32-bit int in s15 to float in s15
 	vmov	r0, s15        // move s15 to result register
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__floatunssisfvfp)
 
diff --git a/lib/libcompiler_rt/arm/gedf2vfp.S b/lib/libcompiler_rt/arm/gedf2vfp.S
index 72f13ef4e71..364fc5b24cd 100644
--- a/lib/libcompiler_rt/arm/gedf2vfp.S
+++ b/lib/libcompiler_rt/arm/gedf2vfp.S
@@ -19,10 +19,15 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__gedf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
 	vmov 	d6, r0, r1	// load r0/r1 pair in double register
 	vmov 	d7, r2, r3	// load r2/r3 pair in double register
 	vcmp.f64 d6, d7
+#endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(ge)
 	movge	r0, #1      // set result register to 1 if greater than or equal
 	movlt	r0, #0
 	bx	lr
diff --git a/lib/libcompiler_rt/arm/gesf2vfp.S b/lib/libcompiler_rt/arm/gesf2vfp.S
index c9ee52c9c44..346c3473ae4 100644
--- a/lib/libcompiler_rt/arm/gesf2vfp.S
+++ b/lib/libcompiler_rt/arm/gesf2vfp.S
@@ -19,10 +19,15 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__gesf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
 	vmov	s14, r0	    // move from GPR 0 to float register
 	vmov	s15, r1	    // move from GPR 1 to float register
 	vcmp.f32 s14, s15
+#endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(ge)
 	movge	r0, #1      // set result register to 1 if greater than or equal
 	movlt	r0, #0
 	bx	lr
diff --git a/lib/libcompiler_rt/arm/gtdf2vfp.S b/lib/libcompiler_rt/arm/gtdf2vfp.S
index c7f277552fa..3389c3ad973 100644
--- a/lib/libcompiler_rt/arm/gtdf2vfp.S
+++ b/lib/libcompiler_rt/arm/gtdf2vfp.S
@@ -19,10 +19,15 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__gtdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
 	vmov 	d6, r0, r1	// load r0/r1 pair in double register
 	vmov 	d7, r2, r3	// load r2/r3 pair in double register
 	vcmp.f64 d6, d7
+#endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(gt)
 	movgt	r0, #1		// set result register to 1 if equal
 	movle	r0, #0
 	bx	lr
diff --git a/lib/libcompiler_rt/arm/gtsf2vfp.S b/lib/libcompiler_rt/arm/gtsf2vfp.S
index 7d49e4564a8..afdba8b018e 100644
--- a/lib/libcompiler_rt/arm/gtsf2vfp.S
+++ b/lib/libcompiler_rt/arm/gtsf2vfp.S
@@ -19,10 +19,15 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__gtsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
 	vmov	s14, r0		// move from GPR 0 to float register
 	vmov	s15, r1		// move from GPR 1 to float register
 	vcmp.f32 s14, s15
+#endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(gt)
 	movgt	r0, #1		// set result register to 1 if equal
 	movle	r0, #0
 	bx	lr
diff --git a/lib/libcompiler_rt/arm/ledf2vfp.S b/lib/libcompiler_rt/arm/ledf2vfp.S
index ca5b553f115..4bbe4c86837 100644
--- a/lib/libcompiler_rt/arm/ledf2vfp.S
+++ b/lib/libcompiler_rt/arm/ledf2vfp.S
@@ -19,10 +19,15 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__ledf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
 	vmov 	d6, r0, r1	// load r0/r1 pair in double register
 	vmov 	d7, r2, r3	// load r2/r3 pair in double register
 	vcmp.f64 d6, d7
+#endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(ls)
 	movls	r0, #1		// set result register to 1 if equal
 	movhi	r0, #0
 	bx	lr
diff --git a/lib/libcompiler_rt/arm/lesf2vfp.S b/lib/libcompiler_rt/arm/lesf2vfp.S
index f25422ece8f..51232bd8ced 100644
--- a/lib/libcompiler_rt/arm/lesf2vfp.S
+++ b/lib/libcompiler_rt/arm/lesf2vfp.S
@@ -19,10 +19,15 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__lesf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
 	vmov	s14, r0     // move from GPR 0 to float register
 	vmov	s15, r1     // move from GPR 1 to float register
 	vcmp.f32 s14, s15
+#endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(ls)
 	movls	r0, #1      // set result register to 1 if equal
 	movhi	r0, #0
 	bx	lr
diff --git a/lib/libcompiler_rt/arm/ltdf2vfp.S b/lib/libcompiler_rt/arm/ltdf2vfp.S
index 6e2c0997c01..8e2928c813d 100644
--- a/lib/libcompiler_rt/arm/ltdf2vfp.S
+++ b/lib/libcompiler_rt/arm/ltdf2vfp.S
@@ -19,10 +19,15 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__ltdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
 	vmov 	d6, r0, r1	// load r0/r1 pair in double register
 	vmov 	d7, r2, r3	// load r2/r3 pair in double register
 	vcmp.f64 d6, d7
+#endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(mi)
 	movmi	r0, #1		// set result register to 1 if equal
 	movpl	r0, #0
 	bx	lr
diff --git a/lib/libcompiler_rt/arm/ltsf2vfp.S b/lib/libcompiler_rt/arm/ltsf2vfp.S
index 95febb60672..59c00c6bab6 100644
--- a/lib/libcompiler_rt/arm/ltsf2vfp.S
+++ b/lib/libcompiler_rt/arm/ltsf2vfp.S
@@ -19,10 +19,15 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__ltsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
 	vmov	s14, r0     // move from GPR 0 to float register
 	vmov	s15, r1     // move from GPR 1 to float register
 	vcmp.f32 s14, s15
+#endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(mi)
 	movmi	r0, #1      // set result register to 1 if equal
 	movpl	r0, #0
 	bx	lr
diff --git a/lib/libcompiler_rt/arm/muldf3vfp.S b/lib/libcompiler_rt/arm/muldf3vfp.S
index f638de1ad28..aa7b2349503 100644
--- a/lib/libcompiler_rt/arm/muldf3vfp.S
+++ b/lib/libcompiler_rt/arm/muldf3vfp.S
@@ -18,10 +18,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__muldf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vmul.f64 d0, d0, d1
+#else
 	vmov 	d6, r0, r1         // move first param from r0/r1 pair into d6
 	vmov 	d7, r2, r3         // move second param from r2/r3 pair into d7
-	vmul.f64 d6, d6, d7		
+	vmul.f64 d6, d6, d7
 	vmov 	r0, r1, d6         // move result back to r0/r1 pair
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__muldf3vfp)
 
diff --git a/lib/libcompiler_rt/arm/mulsf3vfp.S b/lib/libcompiler_rt/arm/mulsf3vfp.S
index bef58d3a0c8..a1da789dcad 100644
--- a/lib/libcompiler_rt/arm/mulsf3vfp.S
+++ b/lib/libcompiler_rt/arm/mulsf3vfp.S
@@ -18,9 +18,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__mulsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vmul.f32 s0, s0, s1
+#else
 	vmov	s14, r0		// move first param from r0 into float register
 	vmov	s15, r1		// move second param from r1 into float register
 	vmul.f32 s13, s14, s15
+#endif
 	vmov	r0, s13		// move result back to r0
 	bx	lr
 END_COMPILERRT_FUNCTION(__mulsf3vfp)
diff --git a/lib/libcompiler_rt/arm/nedf2vfp.S b/lib/libcompiler_rt/arm/nedf2vfp.S
index 78cf529d665..aef72eb0097 100644
--- a/lib/libcompiler_rt/arm/nedf2vfp.S
+++ b/lib/libcompiler_rt/arm/nedf2vfp.S
@@ -19,10 +19,15 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__nedf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
 	vmov 	d6, r0, r1	// load r0/r1 pair in double register
 	vmov 	d7, r2, r3	// load r2/r3 pair in double register
 	vcmp.f64 d6, d7		
+#endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(ne)
 	movne	r0, #1		// set result register to 0 if unequal
 	moveq	r0, #0
 	bx	lr
diff --git a/lib/libcompiler_rt/arm/negdf2vfp.S b/lib/libcompiler_rt/arm/negdf2vfp.S
index 01c8ba6a120..81f0ab8eec1 100644
--- a/lib/libcompiler_rt/arm/negdf2vfp.S
+++ b/lib/libcompiler_rt/arm/negdf2vfp.S
@@ -18,7 +18,11 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__negdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vneg.f64 d0, d0
+#else
 	eor	r1, r1, #-2147483648	// flip sign bit on double in r0/r1 pair
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__negdf2vfp)
 
diff --git a/lib/libcompiler_rt/arm/negsf2vfp.S b/lib/libcompiler_rt/arm/negsf2vfp.S
index 797abb32ead..46ab4a9cf16 100644
--- a/lib/libcompiler_rt/arm/negsf2vfp.S
+++ b/lib/libcompiler_rt/arm/negsf2vfp.S
@@ -18,7 +18,11 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__negsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vneg.f32 s0, s0
+#else
 	eor	r0, r0, #-2147483648	// flip sign bit on float in r0
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__negsf2vfp)
 
diff --git a/lib/libcompiler_rt/arm/nesf2vfp.S b/lib/libcompiler_rt/arm/nesf2vfp.S
index 554d3e46751..50d60f49300 100644
--- a/lib/libcompiler_rt/arm/nesf2vfp.S
+++ b/lib/libcompiler_rt/arm/nesf2vfp.S
@@ -19,10 +19,15 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__nesf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
 	vmov	s14, r0	    // move from GPR 0 to float register
 	vmov	s15, r1	    // move from GPR 1 to float register
 	vcmp.f32 s14, s15
+#endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(ne)
 	movne	r0, #1      // set result register to 1 if unequal
 	moveq	r0, #0
 	bx	lr
diff --git a/lib/libcompiler_rt/arm/subdf3vfp.S b/lib/libcompiler_rt/arm/subdf3vfp.S
index 1fc7d18c3d3..2b6f2bdbfdd 100644
--- a/lib/libcompiler_rt/arm/subdf3vfp.S
+++ b/lib/libcompiler_rt/arm/subdf3vfp.S
@@ -18,10 +18,14 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__subdf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vsub.f64 d0, d0, d1
+#else
 	vmov 	d6, r0, r1         // move first param from r0/r1 pair into d6
 	vmov 	d7, r2, r3         // move second param from r2/r3 pair into d7
 	vsub.f64 d6, d6, d7		
 	vmov 	r0, r1, d6         // move result back to r0/r1 pair
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__subdf3vfp)
 
diff --git a/lib/libcompiler_rt/arm/subsf3vfp.S b/lib/libcompiler_rt/arm/subsf3vfp.S
index 11fe386cd0d..3e83ea26507 100644
--- a/lib/libcompiler_rt/arm/subsf3vfp.S
+++ b/lib/libcompiler_rt/arm/subsf3vfp.S
@@ -12,17 +12,21 @@
 //
 // extern float __subsf3vfp(float a, float b);
 //
-// Returns the difference between two single precision floating point numbers 
+// Returns the difference between two single precision floating point numbers
 // using the Darwin calling convention where single arguments are passsed
 // like 32-bit ints.
 //
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__subsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vsub.f32 s0, s0, s1
+#else
 	vmov	s14, r0		// move first param from r0 into float register
 	vmov	s15, r1		// move second param from r1 into float register
 	vsub.f32 s14, s14, s15
 	vmov	r0, s14		// move result back to r0
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__subsf3vfp)
 
diff --git a/lib/libcompiler_rt/arm/truncdfsf2vfp.S b/lib/libcompiler_rt/arm/truncdfsf2vfp.S
index 04287ad27ce..682e54d3d29 100644
--- a/lib/libcompiler_rt/arm/truncdfsf2vfp.S
+++ b/lib/libcompiler_rt/arm/truncdfsf2vfp.S
@@ -19,9 +19,13 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__truncdfsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcvt.f32.f64 s0, d0
+#else
 	vmov 	d7, r0, r1   // load double from r0/r1 pair
 	vcvt.f32.f64 s15, d7 // convert double to single (trucate precision)
 	vmov 	r0, s15      // return result in r0
+#endif
 	bx	lr
 END_COMPILERRT_FUNCTION(__truncdfsf2vfp)
 
diff --git a/lib/libcompiler_rt/arm/udivsi3.S b/lib/libcompiler_rt/arm/udivsi3.S
index 085f8fb9e2d..b97b3080bff 100644
--- a/lib/libcompiler_rt/arm/udivsi3.S
+++ b/lib/libcompiler_rt/arm/udivsi3.S
@@ -37,15 +37,38 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
 	beq     LOCAL_LABEL(divby0)
 	udiv	r0, r0, r1
 	bx  	lr
-#else
+
+LOCAL_LABEL(divby0):
+	mov     r0, #0
+#  ifdef __ARM_EABI__
+	b       __aeabi_idiv0
+#  else
+	JMP(lr)
+#  endif
+
+#else /* ! __ARM_ARCH_EXT_IDIV__ */
 	cmp	r1, #1
 	bcc	LOCAL_LABEL(divby0)
+#if __ARM_ARCH_ISA_THUMB == 1
+	bne LOCAL_LABEL(num_neq_denom)
+	JMP(lr)
+LOCAL_LABEL(num_neq_denom):
+#else
 	IT(eq)
 	JMPc(lr, eq)
+#endif
 	cmp	r0, r1
+#if __ARM_ARCH_ISA_THUMB == 1
+	bhs LOCAL_LABEL(num_ge_denom)
+	movs r0, #0
+	JMP(lr)
+LOCAL_LABEL(num_ge_denom):
+#else
 	ITT(cc)
 	movcc	r0, #0
 	JMPc(lr, cc)
+#endif
+
 	/*
 	 * Implement division using binary long division algorithm.
 	 *
@@ -62,7 +85,7 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
 	 * that (r0 << shift) < 2 * r1. The quotient is stored in r3.
 	 */
 
-#  ifdef __ARM_FEATURE_CLZ
+#  if defined(__ARM_FEATURE_CLZ)
 	clz	ip, r0
 	clz	r3, r1
 	/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
@@ -77,49 +100,131 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
 	sub	ip, ip, r3, lsl #3
 	mov	r3, #0
 	bx	ip
-#  else
+#  else /* No CLZ Feature */
 #    if __ARM_ARCH_ISA_THUMB == 2
 #    error THUMB mode requires CLZ or UDIV
 #    endif
+#    if __ARM_ARCH_ISA_THUMB == 1
+#      define BLOCK_SIZE 10
+#    else
+#      define BLOCK_SIZE 12
+#    endif
+
 	mov	r2, r0
+#    if __ARM_ARCH_ISA_THUMB == 1
+	mov ip, r0
+	adr r0, LOCAL_LABEL(div0block)
+	adds r0, #1
+#    else
 	adr	ip, LOCAL_LABEL(div0block)
-
-	lsr	r3, r2, #16
+#    endif
+	lsrs	r3, r2, #16
 	cmp	r3, r1
+#    if __ARM_ARCH_ISA_THUMB == 1
+	blo LOCAL_LABEL(skip_16)
+	movs r2, r3
+	subs r0, r0, #(16 * BLOCK_SIZE)
+LOCAL_LABEL(skip_16):
+#    else
 	movhs	r2, r3
-	subhs	ip, ip, #(16 * 12)
+	subhs	ip, ip, #(16 * BLOCK_SIZE)
+#    endif
 
-	lsr	r3, r2, #8
+	lsrs	r3, r2, #8
 	cmp	r3, r1
+#    if __ARM_ARCH_ISA_THUMB == 1
+	blo LOCAL_LABEL(skip_8)
+	movs r2, r3
+	subs r0, r0, #(8 * BLOCK_SIZE)
+LOCAL_LABEL(skip_8):
+#    else
 	movhs	r2, r3
-	subhs	ip, ip, #(8 * 12)
+	subhs	ip, ip, #(8 * BLOCK_SIZE)
+#    endif
 
-	lsr	r3, r2, #4
+	lsrs	r3, r2, #4
 	cmp	r3, r1
+#    if __ARM_ARCH_ISA_THUMB == 1
+	blo LOCAL_LABEL(skip_4)
+	movs r2, r3
+	subs r0, r0, #(4 * BLOCK_SIZE)
+LOCAL_LABEL(skip_4):
+#    else
 	movhs	r2, r3
-	subhs	ip, #(4 * 12)
+	subhs	ip, #(4 * BLOCK_SIZE)
+#    endif
 
-	lsr	r3, r2, #2
+	lsrs	r3, r2, #2
 	cmp	r3, r1
+#    if __ARM_ARCH_ISA_THUMB == 1
+	blo LOCAL_LABEL(skip_2)
+	movs r2, r3
+	subs r0, r0, #(2 * BLOCK_SIZE)
+LOCAL_LABEL(skip_2):
+#    else
 	movhs	r2, r3
-	subhs	ip, ip, #(2 * 12)
+	subhs	ip, ip, #(2 * BLOCK_SIZE)
+#    endif
 
 	/* Last block, no need to update r2 or r3. */
+#    if __ARM_ARCH_ISA_THUMB == 1
+	lsrs r3, r2, #1
+	cmp r3, r1
+	blo LOCAL_LABEL(skip_1)
+	subs r0, r0, #(1 * BLOCK_SIZE)
+LOCAL_LABEL(skip_1):
+	movs r2, r0
+	mov r0, ip
+	movs r3, #0
+	JMP (r2)
+
+#    else
 	cmp	r1, r2, lsr #1
-	subls	ip, ip, #(1 * 12)
+	subls	ip, ip, #(1 * BLOCK_SIZE)
 
-	mov	r3, #0
+	movs	r3, #0
 
 	JMP(ip)
-#  endif
+#    endif
+#  endif /* __ARM_FEATURE_CLZ */
+
 
 #define	IMM	#
+	/* due to the range limit of branch in Thumb1, we have to place the
+		 block closer */
+LOCAL_LABEL(divby0):
+	movs	r0, #0
+#      if defined(__ARM_EABI__)
+	push {r7, lr}
+	bl	__aeabi_idiv0 // due to relocation limit, can't use b.
+	pop  {r7, pc}
+#      else
+	JMP(lr)
+#      endif
 
+
+#if __ARM_ARCH_ISA_THUMB == 1
+#define block(shift)                                                           \
+	lsls r2, r1, IMM shift;                                                      \
+	cmp r0, r2;                                                                  \
+	blo LOCAL_LABEL(block_skip_##shift);                                         \
+	subs r0, r0, r2;                                                             \
+	LOCAL_LABEL(block_skip_##shift) :;                                           \
+	adcs r3, r3 /* same as ((r3 << 1) | Carry). Carry is set if r0 >= r2. */
+
+	/* TODO: if current location counter is not not word aligned, we don't
+		 need the .p2align and nop */
+	/* Label div0block must be word-aligned. First align block 31 */
+	.p2align 2
+	nop /* Padding to align div0block as 31 blocks = 310 bytes */
+
+#else
 #define block(shift)                                                           \
 	cmp	r0, r1, lsl IMM shift;                                         \
 	ITT(hs);                                                               \
 	WIDE(addhs)	r3, r3, IMM (1 << shift);                              \
 	WIDE(subhs)	r0, r0, r1, lsl IMM shift
+#endif
 
 	block(31)
 	block(30)
@@ -159,14 +264,6 @@ LOCAL_LABEL(div0block):
 	JMP(lr)
 #endif /* __ARM_ARCH_EXT_IDIV__ */
 
-LOCAL_LABEL(divby0):
-	mov	r0, #0
-#ifdef __ARM_EABI__
-	b	__aeabi_idiv0
-#else
-	JMP(lr)
-#endif
-
 END_COMPILERRT_FUNCTION(__udivsi3)
 
 NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/libcompiler_rt/arm/unorddf2vfp.S b/lib/libcompiler_rt/arm/unorddf2vfp.S
index 022dd7a978a..6625fa8a311 100644
--- a/lib/libcompiler_rt/arm/unorddf2vfp.S
+++ b/lib/libcompiler_rt/arm/unorddf2vfp.S
@@ -19,10 +19,15 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__unorddf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f64 d0, d1
+#else
 	vmov 	d6, r0, r1	// load r0/r1 pair in double register
 	vmov 	d7, r2, r3	// load r2/r3 pair in double register
-	vcmp.f64 d6, d7		
+	vcmp.f64 d6, d7
+#endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(vs)
 	movvs	r0, #1      // set result register to 1 if "overflow" (any NaNs)
 	movvc	r0, #0
 	bx	lr
diff --git a/lib/libcompiler_rt/arm/unordsf2vfp.S b/lib/libcompiler_rt/arm/unordsf2vfp.S
index 5ebdd3df550..0b5da2ba3e1 100644
--- a/lib/libcompiler_rt/arm/unordsf2vfp.S
+++ b/lib/libcompiler_rt/arm/unordsf2vfp.S
@@ -19,10 +19,15 @@
 	.syntax unified
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__unordsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+	vcmp.f32 s0, s1
+#else
 	vmov	s14, r0     // move from GPR 0 to float register
 	vmov	s15, r1	    // move from GPR 1 to float register
 	vcmp.f32 s14, s15
+#endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(vs)
 	movvs	r0, #1      // set result register to 1 if "overflow" (any NaNs)
 	movvc	r0, #0
 	bx	lr
diff --git a/lib/libcompiler_rt/arm64/Makefile.mk b/lib/libcompiler_rt/arm64/Makefile.mk
deleted file mode 100644
index 7f7e3866130..00000000000
--- a/lib/libcompiler_rt/arm64/Makefile.mk
+++ /dev/null
@@ -1,20 +0,0 @@
-#===- lib/builtins/arm64/Makefile.mk -----------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs := 
-OnlyArchs := arm64 
-
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
-Implementation := Optimized
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
diff --git a/lib/libcompiler_rt/armv6m/Makefile.mk b/lib/libcompiler_rt/armv6m/Makefile.mk
deleted file mode 100644
index f3c1807f01b..00000000000
--- a/lib/libcompiler_rt/armv6m/Makefile.mk
+++ /dev/null
@@ -1,20 +0,0 @@
-#===- lib/builtins/arm/Makefile.mk -------------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs := 
-OnlyArchs := armv6m
-
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
-Implementation := Optimized
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
diff --git a/lib/libcompiler_rt/ashldi3.c b/lib/libcompiler_rt/ashldi3.c
index eb4698ac517..fcb0abdb1fc 100644
--- a/lib/libcompiler_rt/ashldi3.c
+++ b/lib/libcompiler_rt/ashldi3.c
@@ -18,8 +18,6 @@
 
 /* Precondition:  0 <= b < bits_in_dword */
 
-ARM_EABI_FNALIAS(llsl, ashldi3)
-
 COMPILER_RT_ABI di_int
 __ashldi3(di_int a, si_int b)
 {
@@ -41,3 +39,10 @@ __ashldi3(di_int a, si_int b)
     }
     return result.all;
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI di_int __aeabi_llsl(di_int a, si_int b) {
+  return __ashldi3(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/ashrdi3.c b/lib/libcompiler_rt/ashrdi3.c
index 14c878bb779..b4ab4c617ba 100644
--- a/lib/libcompiler_rt/ashrdi3.c
+++ b/lib/libcompiler_rt/ashrdi3.c
@@ -18,8 +18,6 @@
 
 /* Precondition:  0 <= b < bits_in_dword */
 
-ARM_EABI_FNALIAS(lasr, ashrdi3)
-
 COMPILER_RT_ABI di_int
 __ashrdi3(di_int a, si_int b)
 {
@@ -42,3 +40,10 @@ __ashrdi3(di_int a, si_int b)
     }
     return result.all;
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI di_int __aeabi_lasr(di_int a, si_int b) {
+  return __ashrdi3(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/assembly.h b/lib/libcompiler_rt/assembly.h
index 5fc74f68f60..b15da52345c 100644
--- a/lib/libcompiler_rt/assembly.h
+++ b/lib/libcompiler_rt/assembly.h
@@ -44,7 +44,8 @@
 #endif
 #define CONST_SECTION .section .rodata
 
-#if defined(__GNU__) || defined(__ANDROID__) || defined(__FreeBSD__)
+#if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \
+    defined(__linux__)
 #define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits
 #else
 #define NO_EXEC_STACK_DIRECTIVE
@@ -70,7 +71,7 @@
 #if defined(__ARM_ARCH_4T__) || __ARM_ARCH >= 5
 #define ARM_HAS_BX
 #endif
-#if !defined(__ARM_FEATURE_CLZ) &&                                             \
+#if !defined(__ARM_FEATURE_CLZ) && __ARM_ARCH_ISA_THUMB != 1 &&                \
     (__ARM_ARCH >= 6 || (__ARM_ARCH == 5 && !defined(__ARM_ARCH_5__)))
 #define __ARM_FEATURE_CLZ
 #endif
@@ -95,9 +96,11 @@
 #if __ARM_ARCH_ISA_THUMB == 2
 #define IT(cond)  it cond
 #define ITT(cond) itt cond
+#define ITE(cond) ite cond
 #else
 #define IT(cond)
 #define ITT(cond)
+#define ITE(cond)
 #endif
 
 #if __ARM_ARCH_ISA_THUMB == 2
@@ -149,6 +152,7 @@
 #define DEFINE_COMPILERRT_FUNCTION_ALIAS(name, target)                         \
   .globl SYMBOL_NAME(name) SEPARATOR                                           \
   SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \
+  DECLARE_SYMBOL_VISIBILITY(SYMBOL_NAME(name)) SEPARATOR                       \
   .set SYMBOL_NAME(name), SYMBOL_NAME(target) SEPARATOR
 
 #if defined(__ARM_EABI__)
diff --git a/lib/libcompiler_rt/atomic.c b/lib/libcompiler_rt/atomic.c
index f1ddc3e0c52..ee35e342eda 100644
--- a/lib/libcompiler_rt/atomic.c
+++ b/lib/libcompiler_rt/atomic.c
@@ -229,13 +229,20 @@ void __atomic_exchange_c(int size, void *ptr, void *val, void *old, int model) {
 // Where the size is known at compile time, the compiler may emit calls to
 // specialised versions of the above functions.
 ////////////////////////////////////////////////////////////////////////////////
+#ifdef __SIZEOF_INT128__
 #define OPTIMISED_CASES\
   OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t)\
   OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t)\
   OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t)\
   OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t)\
-  /* FIXME: __uint128_t isn't available on 32 bit platforms.
-  OPTIMISED_CASE(16, IS_LOCK_FREE_16, __uint128_t)*/\
+  OPTIMISED_CASE(16, IS_LOCK_FREE_16, __uint128_t)
+#else
+#define OPTIMISED_CASES\
+  OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t)\
+  OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t)\
+  OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t)\
+  OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t)
+#endif
 
 #define OPTIMISED_CASE(n, lockfree, type)\
 type __atomic_load_##n(type *src, int model) {\
diff --git a/lib/libcompiler_rt/bswapdi2.c b/lib/libcompiler_rt/bswapdi2.c
new file mode 100644
index 00000000000..eb220007bb1
--- /dev/null
+++ b/lib/libcompiler_rt/bswapdi2.c
@@ -0,0 +1,27 @@
+/* ===-- bswapdi2.c - Implement __bswapdi2 ---------------------------------===
+ *
+ *               The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __bswapdi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI uint64_t __bswapdi2(uint64_t u) {
+  return (
+      (((u)&0xff00000000000000ULL) >> 56) |
+      (((u)&0x00ff000000000000ULL) >> 40) |
+      (((u)&0x0000ff0000000000ULL) >> 24) |
+      (((u)&0x000000ff00000000ULL) >> 8)  |
+      (((u)&0x00000000ff000000ULL) << 8)  |
+      (((u)&0x0000000000ff0000ULL) << 24) |
+      (((u)&0x000000000000ff00ULL) << 40) |
+      (((u)&0x00000000000000ffULL) << 56));
+}
diff --git a/lib/libcompiler_rt/bswapsi2.c b/lib/libcompiler_rt/bswapsi2.c
new file mode 100644
index 00000000000..5d941e69f7c
--- /dev/null
+++ b/lib/libcompiler_rt/bswapsi2.c
@@ -0,0 +1,23 @@
+/* ===-- bswapsi2.c - Implement __bswapsi2 ---------------------------------===
+ *
+ *               The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __bswapsi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI uint32_t __bswapsi2(uint32_t u) {
+  return (
+      (((u)&0xff000000) >> 24) |
+      (((u)&0x00ff0000) >> 8)  |
+      (((u)&0x0000ff00) << 8)  |
+      (((u)&0x000000ff) << 24));
+}
diff --git a/lib/libcompiler_rt/clear_cache.c b/lib/libcompiler_rt/clear_cache.c
index 55bbdd37589..25570fc2157 100644
--- a/lib/libcompiler_rt/clear_cache.c
+++ b/lib/libcompiler_rt/clear_cache.c
@@ -82,10 +82,6 @@ uintptr_t GetCurrentProcess(void);
   #endif
 #endif
 
-#if defined(__linux__) && defined(__arm__)
-  #include <asm/unistd.h>
-#endif
-
 /*
  * The compiler generates calls to __clear_cache() when creating 
  * trampoline functions on the stack for use with nested functions.
@@ -94,7 +90,7 @@ uintptr_t GetCurrentProcess(void);
  */
 
 void __clear_cache(void *start, void *end) {
-#if __i386__ || __x86_64__
+#if __i386__ || __x86_64__ || defined(_M_IX86) || defined(_M_X64)
 /*
  * Intel processors have a unified instruction and data cache
  * so there is nothing to do
@@ -108,12 +104,23 @@ void __clear_cache(void *start, void *end) {
 
         sysarch(ARM_SYNC_ICACHE, &arg);
     #elif defined(__linux__)
+    /*
+     * We used to include asm/unistd.h for the __ARM_NR_cacheflush define, but
+     * it also brought many other unused defines, as well as a dependency on
+     * kernel headers to be installed.
+     *
+     * This value is stable at least since Linux 3.13 and should remain so for
+     * compatibility reasons, warranting it's re-definition here.
+     */
+    #define __ARM_NR_cacheflush 0x0f0002
          register int start_reg __asm("r0") = (int) (intptr_t) start;
          const register int end_reg __asm("r1") = (int) (intptr_t) end;
+         const register int flags __asm("r2") = 0;
          const register int syscall_nr __asm("r7") = __ARM_NR_cacheflush;
          __asm __volatile("svc 0x0"
                           : "=r"(start_reg)
-                          : "r"(syscall_nr), "r"(start_reg), "r"(end_reg));
+                          : "r"(syscall_nr), "r"(start_reg), "r"(end_reg),
+                            "r"(flags));
          if (start_reg != 0) {
              compilerrt_abort();
          }
diff --git a/lib/libcompiler_rt/comparedf2.c b/lib/libcompiler_rt/comparedf2.c
index 9e29752231e..c5bb169d002 100644
--- a/lib/libcompiler_rt/comparedf2.c
+++ b/lib/libcompiler_rt/comparedf2.c
@@ -113,8 +113,6 @@ __gedf2(fp_t a, fp_t b) {
     }
 }
 
-ARM_EABI_FNALIAS(dcmpun, unorddf2)
-
 COMPILER_RT_ABI int
 __unorddf2(fp_t a, fp_t b) {
     const rep_t aAbs = toRep(a) & absMask;
@@ -144,3 +142,9 @@ __gtdf2(fp_t a, fp_t b) {
     return __gedf2(a, b);
 }
 
+#if defined(__ARM_EABI__)
+AEABI_RTABI int __aeabi_dcmpun(fp_t a, fp_t b) {
+  return __unorddf2(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/comparesf2.c b/lib/libcompiler_rt/comparesf2.c
index 1fd50636aba..4badb5e1b9f 100644
--- a/lib/libcompiler_rt/comparesf2.c
+++ b/lib/libcompiler_rt/comparesf2.c
@@ -113,8 +113,6 @@ __gesf2(fp_t a, fp_t b) {
     }
 }
 
-ARM_EABI_FNALIAS(fcmpun, unordsf2)
-
 COMPILER_RT_ABI int
 __unordsf2(fp_t a, fp_t b) {
     const rep_t aAbs = toRep(a) & absMask;
@@ -143,3 +141,10 @@ COMPILER_RT_ABI enum GE_RESULT
 __gtsf2(fp_t a, fp_t b) {
     return __gesf2(a, b);
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI int __aeabi_fcmpun(fp_t a, fp_t b) {
+  return __unordsf2(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/cpu_model.c b/lib/libcompiler_rt/cpu_model.c
index 9a3737020a4..83ea7a49faf 100644
--- a/lib/libcompiler_rt/cpu_model.c
+++ b/lib/libcompiler_rt/cpu_model.c
@@ -27,6 +27,10 @@
 #include <intrin.h>
 #endif
 
+#ifndef __has_attribute
+#define __has_attribute(attr) 0
+#endif
+
 enum VendorSignatures {
   SIG_INTEL = 0x756e6547 /* Genu */,
   SIG_AMD = 0x68747541 /* Auth */
@@ -40,29 +44,16 @@ enum ProcessorVendors {
 };
 
 enum ProcessorTypes {
-  INTEL_ATOM = 1,
+  INTEL_BONNELL = 1,
   INTEL_CORE2,
   INTEL_COREI7,
   AMDFAM10H,
   AMDFAM15H,
-  INTEL_i386,
-  INTEL_i486,
-  INTEL_PENTIUM,
-  INTEL_PENTIUM_PRO,
-  INTEL_PENTIUM_II,
-  INTEL_PENTIUM_III,
-  INTEL_PENTIUM_IV,
-  INTEL_PENTIUM_M,
-  INTEL_CORE_DUO,
-  INTEL_XEONPHI,
-  INTEL_X86_64,
-  INTEL_NOCONA,
-  INTEL_PRESCOTT,
-  AMD_i486,
-  AMDPENTIUM,
-  AMDATHLON,
-  AMDFAM14H,
-  AMDFAM16H,
+  INTEL_SILVERMONT,
+  INTEL_KNL,
+  AMD_BTVER1,
+  AMD_BTVER2,
+  AMDFAM17H,
   CPU_TYPE_MAX
 };
 
@@ -75,32 +66,14 @@ enum ProcessorSubtypes {
   AMDFAM10H_ISTANBUL,
   AMDFAM15H_BDVER1,
   AMDFAM15H_BDVER2,
-  INTEL_PENTIUM_MMX,
-  INTEL_CORE2_65,
-  INTEL_CORE2_45,
+  AMDFAM15H_BDVER3,
+  AMDFAM15H_BDVER4,
+  AMDFAM17H_ZNVER1,
   INTEL_COREI7_IVYBRIDGE,
   INTEL_COREI7_HASWELL,
   INTEL_COREI7_BROADWELL,
   INTEL_COREI7_SKYLAKE,
   INTEL_COREI7_SKYLAKE_AVX512,
-  INTEL_ATOM_BONNELL,
-  INTEL_ATOM_SILVERMONT,
-  INTEL_KNIGHTS_LANDING,
-  AMDPENTIUM_K6,
-  AMDPENTIUM_K62,
-  AMDPENTIUM_K63,
-  AMDPENTIUM_GEODE,
-  AMDATHLON_TBIRD,
-  AMDATHLON_MP,
-  AMDATHLON_XP,
-  AMDATHLON_K8SSE3,
-  AMDATHLON_OPTERON,
-  AMDATHLON_FX,
-  AMDATHLON_64,
-  AMD_BTVER1,
-  AMD_BTVER2,
-  AMDFAM15H_BDVER3,
-  AMDFAM15H_BDVER4,
   CPU_SUBTYPE_MAX
 };
 
@@ -116,11 +89,26 @@ enum ProcessorFeatures {
   FEATURE_SSE4_2,
   FEATURE_AVX,
   FEATURE_AVX2,
-  FEATURE_AVX512,
-  FEATURE_AVX512SAVE,
-  FEATURE_MOVBE,
-  FEATURE_ADX,
-  FEATURE_EM64T
+  FEATURE_SSE4_A,
+  FEATURE_FMA4,
+  FEATURE_XOP,
+  FEATURE_FMA,
+  FEATURE_AVX512F,
+  FEATURE_BMI,
+  FEATURE_BMI2,
+  FEATURE_AES,
+  FEATURE_PCLMUL,
+  FEATURE_AVX512VL,
+  FEATURE_AVX512BW,
+  FEATURE_AVX512DQ,
+  FEATURE_AVX512CD,
+  FEATURE_AVX512ER,
+  FEATURE_AVX512PF,
+  FEATURE_AVX512VBMI,
+  FEATURE_AVX512IFMA,
+  FEATURE_AVX5124VNNIW,
+  FEATURE_AVX5124FMAPS,
+  FEATURE_AVX512VPOPCNTDQ
 };
 
 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
@@ -160,26 +148,27 @@ static bool isCpuIdSupported() {
 
 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
 /// the specified arguments.  If we can't run cpuid on the host, return true.
-static void getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
+static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
                                unsigned *rECX, unsigned *rEDX) {
 #if defined(__GNUC__) || defined(__clang__)
 #if defined(__x86_64__)
-  // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
+  // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
+  // FIXME: should we save this for Clang?
   __asm__("movq\t%%rbx, %%rsi\n\t"
           "cpuid\n\t"
           "xchgq\t%%rbx, %%rsi\n\t"
           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
           : "a"(value));
+  return false;
 #elif defined(__i386__)
   __asm__("movl\t%%ebx, %%esi\n\t"
           "cpuid\n\t"
           "xchgl\t%%ebx, %%esi\n\t"
           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
           : "a"(value));
-// pedantic #else returns to appease -Wunreachable-code (so we don't generate
-// postprocessed code that looks like "return true; return false;")
+  return false;
 #else
-  assert(0 && "This method is defined only for x86.");
+  return true;
 #endif
 #elif defined(_MSC_VER)
   // The MSVC intrinsic is portable across x86 and x64.
@@ -189,19 +178,20 @@ static void getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
   *rEBX = registers[1];
   *rECX = registers[2];
   *rEDX = registers[3];
+  return false;
 #else
-  assert(0 && "This method is defined only for GNUC, Clang or MSVC.");
+  return true;
 #endif
 }
 
 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
 /// the 4 values in the specified arguments.  If we can't run cpuid on the host,
 /// return true.
-static void getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
+static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
                                  unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
                                  unsigned *rEDX) {
-#if defined(__x86_64__) || defined(_M_X64)
 #if defined(__GNUC__) || defined(__clang__)
+#if defined(__x86_64__)
   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
   // FIXME: should we save this for Clang?
   __asm__("movq\t%%rbx, %%rsi\n\t"
@@ -209,42 +199,27 @@ static void getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
           "xchgq\t%%rbx, %%rsi\n\t"
           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
           : "a"(value), "c"(subleaf));
-#elif defined(_MSC_VER)
-  int registers[4];
-  __cpuidex(registers, value, subleaf);
-  *rEAX = registers[0];
-  *rEBX = registers[1];
-  *rECX = registers[2];
-  *rEDX = registers[3];
-#else
-  assert(0 && "This method is defined only for GNUC, Clang or MSVC.");
-#endif
-#elif defined(__i386__) || defined(_M_IX86)
-#if defined(__GNUC__) || defined(__clang__)
+  return false;
+#elif defined(__i386__)
   __asm__("movl\t%%ebx, %%esi\n\t"
           "cpuid\n\t"
           "xchgl\t%%ebx, %%esi\n\t"
           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
           : "a"(value), "c"(subleaf));
-#elif defined(_MSC_VER)
-  __asm {
-      mov   eax,value
-      mov   ecx,subleaf
-      cpuid
-      mov   esi,rEAX
-      mov   dword ptr [esi],eax
-      mov   esi,rEBX
-      mov   dword ptr [esi],ebx
-      mov   esi,rECX
-      mov   dword ptr [esi],ecx
-      mov   esi,rEDX
-      mov   dword ptr [esi],edx
-  }
+  return false;
 #else
-  assert(0 && "This method is defined only for GNUC, Clang or MSVC.");
+  return true;
 #endif
+#elif defined(_MSC_VER)
+  int registers[4];
+  __cpuidex(registers, value, subleaf);
+  *rEAX = registers[0];
+  *rEBX = registers[1];
+  *rECX = registers[2];
+  *rEDX = registers[3];
+  return false;
 #else
-  assert(0 && "This method is defined only for x86.");
+  return true;
 #endif
 }
 
@@ -279,84 +254,15 @@ static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
   }
 }
 
-static void getIntelProcessorTypeAndSubtype(unsigned int Family,
-                                            unsigned int Model,
-                                            unsigned int Brand_id,
-                                            unsigned int Features,
-                                            unsigned *Type, unsigned *Subtype) {
+static void
+getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
+                                unsigned Brand_id, unsigned Features,
+                                unsigned *Type, unsigned *Subtype) {
   if (Brand_id != 0)
     return;
   switch (Family) {
-  case 3:
-    *Type = INTEL_i386;
-    break;
-  case 4:
-    switch (Model) {
-    case 0: // Intel486 DX processors
-    case 1: // Intel486 DX processors
-    case 2: // Intel486 SX processors
-    case 3: // Intel487 processors, IntelDX2 OverDrive processors,
-            // IntelDX2 processors
-    case 4: // Intel486 SL processor
-    case 5: // IntelSX2 processors
-    case 7: // Write-Back Enhanced IntelDX2 processors
-    case 8: // IntelDX4 OverDrive processors, IntelDX4 processors
-    default:
-      *Type = INTEL_i486;
-      break;
-    }
-  case 5:
-    switch (Model) {
-    case 1: // Pentium OverDrive processor for Pentium processor (60, 66),
-            // Pentium processors (60, 66)
-    case 2: // Pentium OverDrive processor for Pentium processor (75, 90,
-            // 100, 120, 133), Pentium processors (75, 90, 100, 120, 133,
-            // 150, 166, 200)
-    case 3: // Pentium OverDrive processors for Intel486 processor-based
-            // systems
-      *Type = INTEL_PENTIUM;
-      break;
-    case 4: // Pentium OverDrive processor with MMX technology for Pentium
-            // processor (75, 90, 100, 120, 133), Pentium processor with
-            // MMX technology (166, 200)
-      *Type = INTEL_PENTIUM;
-      *Subtype = INTEL_PENTIUM_MMX;
-      break;
-    default:
-      *Type = INTEL_PENTIUM;
-      break;
-    }
   case 6:
     switch (Model) {
-    case 0x01: // Pentium Pro processor
-      *Type = INTEL_PENTIUM_PRO;
-      break;
-    case 0x03: // Intel Pentium II OverDrive processor, Pentium II processor,
-               // model 03
-    case 0x05: // Pentium II processor, model 05, Pentium II Xeon processor,
-               // model 05, and Intel Celeron processor, model 05
-    case 0x06: // Celeron processor, model 06
-      *Type = INTEL_PENTIUM_II;
-      break;
-    case 0x07: // Pentium III processor, model 07, and Pentium III Xeon
-               // processor, model 07
-    case 0x08: // Pentium III processor, model 08, Pentium III Xeon processor,
-               // model 08, and Celeron processor, model 08
-    case 0x0a: // Pentium III Xeon processor, model 0Ah
-    case 0x0b: // Pentium III processor, model 0Bh
-      *Type = INTEL_PENTIUM_III;
-      break;
-    case 0x09: // Intel Pentium M processor, Intel Celeron M processor model 09.
-    case 0x0d: // Intel Pentium M processor, Intel Celeron M processor, model
-               // 0Dh. All processors are manufactured using the 90 nm process.
-    case 0x15: // Intel EP80579 Integrated Processor and Intel EP80579
-               // Integrated Processor with Intel QuickAssist Technology
-      *Type = INTEL_PENTIUM_M;
-      break;
-    case 0x0e: // Intel Core Duo processor, Intel Core Solo processor, model
-               // 0Eh. All processors are manufactured using the 65 nm process.
-      *Type = INTEL_CORE_DUO;
-      break;   // yonah
     case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
                // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
                // mobile processor, Intel Core 2 Extreme processor, Intel
@@ -364,9 +270,6 @@ static void getIntelProcessorTypeAndSubtype(unsigned int Family,
                // 0Fh. All processors are manufactured using the 65 nm process.
     case 0x16: // Intel Celeron processor model 16h. All processors are
                // manufactured using the 65 nm process
-      *Type = INTEL_CORE2; // "core2"
-      *Subtype = INTEL_CORE2_65;
-      break;
     case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
                // 17h. All processors are manufactured using the 45 nm process.
                //
@@ -374,14 +277,13 @@ static void getIntelProcessorTypeAndSubtype(unsigned int Family,
     case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
                // the 45 nm process.
       *Type = INTEL_CORE2; // "penryn"
-      *Subtype = INTEL_CORE2_45;
       break;
     case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
                // processors are manufactured using the 45 nm process.
     case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
                // As found in a Summer 2010 model iMac.
     case 0x1f:
-    case 0x2e:              // Nehalem EX
+    case 0x2e:             // Nehalem EX
       *Type = INTEL_COREI7; // "nehalem"
       *Subtype = INTEL_COREI7_NEHALEM;
       break;
@@ -399,7 +301,7 @@ static void getIntelProcessorTypeAndSubtype(unsigned int Family,
       *Subtype = INTEL_COREI7_SANDYBRIDGE;
       break;
     case 0x3a:
-    case 0x3e:              // Ivy Bridge EP
+    case 0x3e:             // Ivy Bridge EP
       *Type = INTEL_COREI7; // "ivybridge"
       *Subtype = INTEL_COREI7_IVYBRIDGE;
       break;
@@ -423,22 +325,26 @@ static void getIntelProcessorTypeAndSubtype(unsigned int Family,
       break;
 
     // Skylake:
-    case 0x4e:
-      *Type = INTEL_COREI7; // "skylake-avx512"
-      *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
-      break;
-    case 0x5e:
+    case 0x4e: // Skylake mobile
+    case 0x5e: // Skylake desktop
+    case 0x8e: // Kaby Lake mobile
+    case 0x9e: // Kaby Lake desktop
       *Type = INTEL_COREI7; // "skylake"
       *Subtype = INTEL_COREI7_SKYLAKE;
       break;
 
+    // Skylake Xeon:
+    case 0x55:
+      *Type = INTEL_COREI7;
+      *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
+      break;
+
     case 0x1c: // Most 45 nm Intel Atom processors
     case 0x26: // 45 nm Atom Lincroft
     case 0x27: // 32 nm Atom Medfield
     case 0x35: // 32 nm Atom Midview
     case 0x36: // 32 nm Atom Midview
-      *Type = INTEL_ATOM;
-      *Subtype = INTEL_ATOM_BONNELL;
+      *Type = INTEL_BONNELL;
       break; // "bonnell"
 
     // Atom Silvermont codes from the Intel software optimization guide.
@@ -448,185 +354,29 @@ static void getIntelProcessorTypeAndSubtype(unsigned int Family,
     case 0x5a:
     case 0x5d:
     case 0x4c: // really airmont
-      *Type = INTEL_ATOM;
-      *Subtype = INTEL_ATOM_SILVERMONT;
+      *Type = INTEL_SILVERMONT;
       break; // "silvermont"
 
     case 0x57:
-      *Type = INTEL_XEONPHI; // knl
-      *Subtype = INTEL_KNIGHTS_LANDING;
+      *Type = INTEL_KNL; // knl
       break;
 
-    default: // Unknown family 6 CPU, try to guess.
-      if (Features & (1 << FEATURE_AVX512)) {
-        *Type = INTEL_XEONPHI; // knl
-        *Subtype = INTEL_KNIGHTS_LANDING;
-        break;
-      }
-      if (Features & (1 << FEATURE_ADX)) {
-        *Type = INTEL_COREI7;
-        *Subtype = INTEL_COREI7_BROADWELL;
-        break;
-      }
-      if (Features & (1 << FEATURE_AVX2)) {
-        *Type = INTEL_COREI7;
-        *Subtype = INTEL_COREI7_HASWELL;
-        break;
-      }
-      if (Features & (1 << FEATURE_AVX)) {
-        *Type = INTEL_COREI7;
-        *Subtype = INTEL_COREI7_SANDYBRIDGE;
-        break;
-      }
-      if (Features & (1 << FEATURE_SSE4_2)) {
-        if (Features & (1 << FEATURE_MOVBE)) {
-          *Type = INTEL_ATOM;
-          *Subtype = INTEL_ATOM_SILVERMONT;
-        } else {
-          *Type = INTEL_COREI7;
-          *Subtype = INTEL_COREI7_NEHALEM;
-        }
-        break;
-      }
-      if (Features & (1 << FEATURE_SSE4_1)) {
-        *Type = INTEL_CORE2; // "penryn"
-        *Subtype = INTEL_CORE2_45;
-        break;
-      }
-      if (Features & (1 << FEATURE_SSSE3)) {
-        if (Features & (1 << FEATURE_MOVBE)) {
-          *Type = INTEL_ATOM;
-          *Subtype = INTEL_ATOM_BONNELL; // "bonnell"
-        } else {
-          *Type = INTEL_CORE2; // "core2"
-          *Subtype = INTEL_CORE2_65;
-        }
-        break;
-      }
-      if (Features & (1 << FEATURE_EM64T)) {
-        *Type = INTEL_X86_64;
-        break; // x86-64
-      }
-      if (Features & (1 << FEATURE_SSE2)) {
-        *Type = INTEL_PENTIUM_M;
-        break;
-      }
-      if (Features & (1 << FEATURE_SSE)) {
-        *Type = INTEL_PENTIUM_III;
-        break;
-      }
-      if (Features & (1 << FEATURE_MMX)) {
-        *Type = INTEL_PENTIUM_II;
-        break;
-      }
-      *Type = INTEL_PENTIUM_PRO;
-      break;
-    }
-  case 15: {
-    switch (Model) {
-    case 0: // Pentium 4 processor, Intel Xeon processor. All processors are
-            // model 00h and manufactured using the 0.18 micron process.
-    case 1: // Pentium 4 processor, Intel Xeon processor, Intel Xeon
-            // processor MP, and Intel Celeron processor. All processors are
-            // model 01h and manufactured using the 0.18 micron process.
-    case 2: // Pentium 4 processor, Mobile Intel Pentium 4 processor - M,
-            // Intel Xeon processor, Intel Xeon processor MP, Intel Celeron
-            // processor, and Mobile Intel Celeron processor. All processors
-            // are model 02h and manufactured using the 0.13 micron process.
-      *Type =
-          ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV);
-      break;
-
-    case 3: // Pentium 4 processor, Intel Xeon processor, Intel Celeron D
-            // processor. All processors are model 03h and manufactured using
-            // the 90 nm process.
-    case 4: // Pentium 4 processor, Pentium 4 processor Extreme Edition,
-            // Pentium D processor, Intel Xeon processor, Intel Xeon
-            // processor MP, Intel Celeron D processor. All processors are
-            // model 04h and manufactured using the 90 nm process.
-    case 6: // Pentium 4 processor, Pentium D processor, Pentium processor
-            // Extreme Edition, Intel Xeon processor, Intel Xeon processor
-            // MP, Intel Celeron D processor. All processors are model 06h
-            // and manufactured using the 65 nm process.
-      *Type =
-          ((Features & (1 << FEATURE_EM64T)) ? INTEL_NOCONA : INTEL_PRESCOTT);
-      break;
-
-    default:
-      *Type =
-          ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV);
+    default: // Unknown family 6 CPU.
       break;
+    break;
     }
-  }
   default:
-    break; /*"generic"*/
+    break; // Unknown.
   }
 }
 
-static void getAMDProcessorTypeAndSubtype(unsigned int Family,
-                                          unsigned int Model,
-                                          unsigned int Features, unsigned *Type,
+static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
+                                          unsigned Features, unsigned *Type,
                                           unsigned *Subtype) {
   // FIXME: this poorly matches the generated SubtargetFeatureKV table.  There
   // appears to be no way to generate the wide variety of AMD-specific targets
   // from the information returned from CPUID.
   switch (Family) {
-  case 4:
-    *Type = AMD_i486;
-  case 5:
-    *Type = AMDPENTIUM;
-    switch (Model) {
-    case 6:
-    case 7:
-      *Subtype = AMDPENTIUM_K6;
-      break; // "k6"
-    case 8:
-      *Subtype = AMDPENTIUM_K62;
-      break; // "k6-2"
-    case 9:
-    case 13:
-      *Subtype = AMDPENTIUM_K63;
-      break; // "k6-3"
-    case 10:
-      *Subtype = AMDPENTIUM_GEODE;
-      break; // "geode"
-    default:
-      break;
-    }
-  case 6:
-    *Type = AMDATHLON;
-    switch (Model) {
-    case 4:
-      *Subtype = AMDATHLON_TBIRD;
-      break; // "athlon-tbird"
-    case 6:
-    case 7:
-    case 8:
-      *Subtype = AMDATHLON_MP;
-      break; // "athlon-mp"
-    case 10:
-      *Subtype = AMDATHLON_XP;
-      break; // "athlon-xp"
-    default:
-      break;
-    }
-  case 15:
-    *Type = AMDATHLON;
-    if (Features & (1 << FEATURE_SSE3)) {
-      *Subtype = AMDATHLON_K8SSE3;
-      break; // "k8-sse3"
-    }
-    switch (Model) {
-    case 1:
-      *Subtype = AMDATHLON_OPTERON;
-      break; // "opteron"
-    case 5:
-      *Subtype = AMDATHLON_FX;
-      break; // "athlon-fx"; also opteron
-    default:
-      *Subtype = AMDATHLON_64;
-      break; // "athlon64"
-    }
   case 16:
     *Type = AMDFAM10H; // "amdfam10"
     switch (Model) {
@@ -639,23 +389,16 @@ static void getAMDProcessorTypeAndSubtype(unsigned int Family,
     case 8:
       *Subtype = AMDFAM10H_ISTANBUL;
       break;
-    default:
-      break;
     }
+    break;
   case 20:
-    *Type = AMDFAM14H;
-    *Subtype = AMD_BTVER1;
+    *Type = AMD_BTVER1;
     break; // "btver1";
   case 21:
     *Type = AMDFAM15H;
-    if (!(Features &
-          (1 << FEATURE_AVX))) { // If no AVX support, provide a sane fallback.
-      *Subtype = AMD_BTVER1;
-      break; // "btver1"
-    }
-    if (Model >= 0x50 && Model <= 0x6f) {
+    if (Model >= 0x60 && Model <= 0x7f) {
       *Subtype = AMDFAM15H_BDVER4;
-      break; // "bdver4"; 50h-6Fh: Excavator
+      break; // "bdver4"; 60h-7Fh: Excavator
     }
     if (Model >= 0x30 && Model <= 0x3f) {
       *Subtype = AMDFAM15H_BDVER3;
@@ -671,31 +414,47 @@ static void getAMDProcessorTypeAndSubtype(unsigned int Family,
     }
     break;
   case 22:
-    *Type = AMDFAM16H;
-    if (!(Features &
-          (1 << FEATURE_AVX))) { // If no AVX support provide a sane fallback.
-      *Subtype = AMD_BTVER1;
-      break; // "btver1";
-    }
-    *Subtype = AMD_BTVER2;
+    *Type = AMD_BTVER2;
     break; // "btver2"
+  case 23:
+    *Type = AMDFAM17H;
+    *Subtype = AMDFAM17H_ZNVER1;
+    break;
   default:
     break; // "generic"
   }
 }
 
-static unsigned getAvailableFeatures(unsigned int ECX, unsigned int EDX,
-                                     unsigned MaxLeaf) {
+static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
+                                 unsigned *FeaturesOut) {
   unsigned Features = 0;
-  unsigned int EAX, EBX;
-  Features |= (((EDX >> 23) & 1) << FEATURE_MMX);
-  Features |= (((EDX >> 25) & 1) << FEATURE_SSE);
-  Features |= (((EDX >> 26) & 1) << FEATURE_SSE2);
-  Features |= (((ECX >> 0) & 1) << FEATURE_SSE3);
-  Features |= (((ECX >> 9) & 1) << FEATURE_SSSE3);
-  Features |= (((ECX >> 19) & 1) << FEATURE_SSE4_1);
-  Features |= (((ECX >> 20) & 1) << FEATURE_SSE4_2);
-  Features |= (((ECX >> 22) & 1) << FEATURE_MOVBE);
+  unsigned EAX, EBX;
+
+  if ((EDX >> 15) & 1)
+    Features |= 1 << FEATURE_CMOV;
+  if ((EDX >> 23) & 1)
+    Features |= 1 << FEATURE_MMX;
+  if ((EDX >> 25) & 1)
+    Features |= 1 << FEATURE_SSE;
+  if ((EDX >> 26) & 1)
+    Features |= 1 << FEATURE_SSE2;
+
+  if ((ECX >> 0) & 1)
+    Features |= 1 << FEATURE_SSE3;
+  if ((ECX >> 1) & 1)
+    Features |= 1 << FEATURE_PCLMUL;
+  if ((ECX >> 9) & 1)
+    Features |= 1 << FEATURE_SSSE3;
+  if ((ECX >> 12) & 1)
+    Features |= 1 << FEATURE_FMA;
+  if ((ECX >> 19) & 1)
+    Features |= 1 << FEATURE_SSE4_1;
+  if ((ECX >> 20) & 1)
+    Features |= 1 << FEATURE_SSE4_2;
+  if ((ECX >> 23) & 1)
+    Features |= 1 << FEATURE_POPCNT;
+  if ((ECX >> 25) & 1)
+    Features |= 1 << FEATURE_AES;
 
   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
   // indicates that the AVX registers will be saved and restored on context
@@ -704,30 +463,72 @@ static unsigned getAvailableFeatures(unsigned int ECX, unsigned int EDX,
   bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
                 ((EAX & 0x6) == 0x6);
   bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
-  bool HasLeaf7 = MaxLeaf >= 0x7;
-  getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
-  bool HasADX = HasLeaf7 && ((EBX >> 19) & 1);
-  bool HasAVX2 = HasAVX && HasLeaf7 && (EBX & 0x20);
-  bool HasAVX512 = HasLeaf7 && HasAVX512Save && ((EBX >> 16) & 1);
-  Features |= (HasAVX << FEATURE_AVX);
-  Features |= (HasAVX2 << FEATURE_AVX2);
-  Features |= (HasAVX512 << FEATURE_AVX512);
-  Features |= (HasAVX512Save << FEATURE_AVX512SAVE);
-  Features |= (HasADX << FEATURE_ADX);
-
-  getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
-  Features |= (((EDX >> 29) & 0x1) << FEATURE_EM64T);
-  return Features;
+
+  if (HasAVX)
+    Features |= 1 << FEATURE_AVX;
+
+  bool HasLeaf7 =
+      MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
+
+  if (HasLeaf7 && ((EBX >> 3) & 1))
+    Features |= 1 << FEATURE_BMI;
+  if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
+    Features |= 1 << FEATURE_AVX2;
+  if (HasLeaf7 && ((EBX >> 9) & 1))
+    Features |= 1 << FEATURE_BMI2;
+  if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512F;
+  if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512DQ;
+  if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512IFMA;
+  if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512PF;
+  if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512ER;
+  if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512CD;
+  if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512BW;
+  if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512VL;
+
+  if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512VBMI;
+  if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512VPOPCNTDQ;
+
+  if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX5124VNNIW;
+  if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX5124FMAPS;
+
+  unsigned MaxExtLevel;
+  getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
+
+  bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
+                     !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+  if (HasExtLeaf1 && ((ECX >> 6) & 1))
+    Features |= 1 << FEATURE_SSE4_A;
+  if (HasExtLeaf1 && ((ECX >> 11) & 1))
+    Features |= 1 << FEATURE_XOP;
+  if (HasExtLeaf1 && ((ECX >> 16) & 1))
+    Features |= 1 << FEATURE_FMA4;
+
+  *FeaturesOut = Features;
 }
 
-#ifdef HAVE_INIT_PRIORITY
-#define CONSTRUCTOR_PRIORITY (101)
+#if defined(HAVE_INIT_PRIORITY)
+#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101))
+#elif __has_attribute(__constructor__)
+#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__))
 #else
-#define CONSTRUCTOR_PRIORITY
+// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
+// this runs during initialization.
+#define CONSTRUCTOR_ATTRIBUTE
 #endif
 
-int __cpu_indicator_init(void)
-    __attribute__((constructor CONSTRUCTOR_PRIORITY));
+int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
 
 struct __processor_model {
   unsigned int __cpu_vendor;
@@ -742,13 +543,13 @@ struct __processor_model {
    the priority set.  However, it still runs after ifunc initializers and
    needs to be called explicitly there.  */
 
-int __attribute__((constructor CONSTRUCTOR_PRIORITY))
+int CONSTRUCTOR_ATTRIBUTE
 __cpu_indicator_init(void) {
-  unsigned int EAX, EBX, ECX, EDX;
-  unsigned int MaxLeaf = 5;
-  unsigned int Vendor;
-  unsigned int Model, Family, Brand_id;
-  unsigned int Features = 0;
+  unsigned EAX, EBX, ECX, EDX;
+  unsigned MaxLeaf = 5;
+  unsigned Vendor;
+  unsigned Model, Family, Brand_id;
+  unsigned Features = 0;
 
   /* This function needs to run just once.  */
   if (__cpu_model.__cpu_vendor)
@@ -758,9 +559,7 @@ __cpu_indicator_init(void) {
     return -1;
 
   /* Assume cpuid insn present. Run in level 0 to get vendor id. */
-  getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX);
-
-  if (MaxLeaf < 1) {
+  if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
     __cpu_model.__cpu_vendor = VENDOR_OTHER;
     return -1;
   }
@@ -769,7 +568,7 @@ __cpu_indicator_init(void) {
   Brand_id = EBX & 0xff;
 
   /* Find available features. */
-  Features = getAvailableFeatures(ECX, EDX, MaxLeaf);
+  getAvailableFeatures(ECX, EDX, MaxLeaf, &Features);
   __cpu_model.__cpu_features[0] = Features;
 
   if (Vendor == SIG_INTEL) {
diff --git a/lib/libcompiler_rt/divdf3.c b/lib/libcompiler_rt/divdf3.c
index ab44c2b25fe..492e32b851e 100644
--- a/lib/libcompiler_rt/divdf3.c
+++ b/lib/libcompiler_rt/divdf3.c
@@ -19,8 +19,6 @@
 #define DOUBLE_PRECISION
 #include "fp_lib.h"
 
-ARM_EABI_FNALIAS(ddiv, divdf3)
-
 COMPILER_RT_ABI fp_t
 __divdf3(fp_t a, fp_t b) {
     
@@ -183,3 +181,10 @@ __divdf3(fp_t a, fp_t b) {
         return result;
     }
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_ddiv(fp_t a, fp_t b) {
+  return __divdf3(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/divsf3.c b/lib/libcompiler_rt/divsf3.c
index de2e376125b..aa6289a6d70 100644
--- a/lib/libcompiler_rt/divsf3.c
+++ b/lib/libcompiler_rt/divsf3.c
@@ -19,8 +19,6 @@
 #define SINGLE_PRECISION
 #include "fp_lib.h"
 
-ARM_EABI_FNALIAS(fdiv, divsf3)
-
 COMPILER_RT_ABI fp_t
 __divsf3(fp_t a, fp_t b) {
     
@@ -167,3 +165,10 @@ __divsf3(fp_t a, fp_t b) {
         return fromRep(absResult | quotientSign);
     }
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_fdiv(fp_t a, fp_t b) {
+  return __divsf3(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/divsi3.c b/lib/libcompiler_rt/divsi3.c
index bab4aefda30..3852e3990b5 100644
--- a/lib/libcompiler_rt/divsi3.c
+++ b/lib/libcompiler_rt/divsi3.c
@@ -16,8 +16,6 @@
 
 /* Returns: a / b */
 
-ARM_EABI_FNALIAS(idiv, divsi3)
-
 COMPILER_RT_ABI si_int
 __divsi3(si_int a, si_int b)
 {
@@ -35,3 +33,10 @@ __divsi3(si_int a, si_int b)
      */
     return ((su_int)a/(su_int)b ^ s_a) - s_a;    /* negate if s_a == -1 */
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI si_int __aeabi_idiv(si_int a, si_int b) {
+  return __divsi3(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/divtc3.c b/lib/libcompiler_rt/divtc3.c
index 04693df471f..16e538ba4a3 100644
--- a/lib/libcompiler_rt/divtc3.c
+++ b/lib/libcompiler_rt/divtc3.c
@@ -17,7 +17,7 @@
 
 /* Returns: the quotient of (a + ib) / (c + id) */
 
-COMPILER_RT_ABI long double _Complex
+COMPILER_RT_ABI Lcomplex
 __divtc3(long double __a, long double __b, long double __c, long double __d)
 {
     int __ilogbw = 0;
@@ -29,31 +29,31 @@ __divtc3(long double __a, long double __b, long double __c, long double __d)
         __d = crt_scalbnl(__d, -__ilogbw);
     }
     long double __denom = __c * __c + __d * __d;
-    long double _Complex z;
-    __real__ z = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw);
-    __imag__ z = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw);
-    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z))
+    Lcomplex z;
+    COMPLEX_REAL(z) = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw);
+    COMPLEX_IMAGINARY(z) = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw);
+    if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
     {
         if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b)))
         {
-            __real__ z = crt_copysignl(CRT_INFINITY, __c) * __a;
-            __imag__ z = crt_copysignl(CRT_INFINITY, __c) * __b;
+            COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a;
+            COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b;
         }
         else if ((crt_isinf(__a) || crt_isinf(__b)) &&
                  crt_isfinite(__c) && crt_isfinite(__d))
         {
             __a = crt_copysignl(crt_isinf(__a) ? 1.0 : 0.0, __a);
             __b = crt_copysignl(crt_isinf(__b) ? 1.0 : 0.0, __b);
-            __real__ z = CRT_INFINITY * (__a * __c + __b * __d);
-            __imag__ z = CRT_INFINITY * (__b * __c - __a * __d);
+            COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
+            COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
         }
         else if (crt_isinf(__logbw) && __logbw > 0.0 &&
                  crt_isfinite(__a) && crt_isfinite(__b))
         {
             __c = crt_copysignl(crt_isinf(__c) ? 1.0 : 0.0, __c);
             __d = crt_copysignl(crt_isinf(__d) ? 1.0 : 0.0, __d);
-            __real__ z = 0.0 * (__a * __c + __b * __d);
-            __imag__ z = 0.0 * (__b * __c - __a * __d);
+            COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d);
+            COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d);
         }
     }
     return z;
diff --git a/lib/libcompiler_rt/emutls.c b/lib/libcompiler_rt/emutls.c
index eccbf53366e..12aad3a42b7 100644
--- a/lib/libcompiler_rt/emutls.c
+++ b/lib/libcompiler_rt/emutls.c
@@ -7,7 +7,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include <pthread.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
@@ -15,6 +14,23 @@
 #include "int_lib.h"
 #include "int_util.h"
 
+typedef struct emutls_address_array {
+    uintptr_t size;  /* number of elements in the 'data' array */
+    void* data[];
+} emutls_address_array;
+
+static void emutls_shutdown(emutls_address_array *array);
+
+#ifndef _WIN32
+
+#include <pthread.h>
+
+static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_key_t emutls_pthread_key;
+
+typedef unsigned int gcc_word __attribute__((mode(word)));
+typedef unsigned int gcc_pointer __attribute__((mode(pointer)));
+
 /* Default is not to use posix_memalign, so systems like Android
  * can use thread local data without heavier POSIX memory allocators.
  */
@@ -22,26 +38,6 @@
 #define EMUTLS_USE_POSIX_MEMALIGN 0
 #endif
 
-/* For every TLS variable xyz,
- * there is one __emutls_control variable named __emutls_v.xyz.
- * If xyz has non-zero initial value, __emutls_v.xyz's "value"
- * will point to __emutls_t.xyz, which has the initial value.
- */
-typedef unsigned int gcc_word __attribute__((mode(word)));
-typedef struct __emutls_control {
-    /* Must use gcc_word here, instead of size_t, to match GCC.  When
-       gcc_word is larger than size_t, the upper extra bits are all
-       zeros.  We can use variables of size_t to operate on size and
-       align.  */
-    gcc_word size;  /* size of the object in bytes */
-    gcc_word align;  /* alignment of the object in bytes */
-    union {
-        uintptr_t index;  /* data[index-1] is the object address */
-        void* address;  /* object address, when in single thread env */
-    } object;
-    void* value;  /* null or non-zero initial value for the object */
-} __emutls_control;
-
 static __inline void *emutls_memalign_alloc(size_t align, size_t size) {
     void *base;
 #if EMUTLS_USE_POSIX_MEMALIGN
@@ -50,7 +46,7 @@ static __inline void *emutls_memalign_alloc(size_t align, size_t size) {
 #else
     #define EXTRA_ALIGN_PTR_BYTES (align - 1 + sizeof(void*))
     char* object;
-    if ((object = malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL)
+    if ((object = (char*)malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL)
         abort();
     base = (void*)(((uintptr_t)(object + EXTRA_ALIGN_PTR_BYTES))
                     & ~(uintptr_t)(align - 1));
@@ -69,10 +65,207 @@ static __inline void emutls_memalign_free(void *base) {
 #endif
 }
 
+static void emutls_key_destructor(void* ptr) {
+    emutls_shutdown((emutls_address_array*)ptr);
+    free(ptr);
+}
+
+static __inline void emutls_init(void) {
+    if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0)
+        abort();
+}
+
+static __inline void emutls_init_once(void) {
+    static pthread_once_t once = PTHREAD_ONCE_INIT;
+    pthread_once(&once, emutls_init);
+}
+
+static __inline void emutls_lock() {
+    pthread_mutex_lock(&emutls_mutex);
+}
+
+static __inline void emutls_unlock() {
+    pthread_mutex_unlock(&emutls_mutex);
+}
+
+static __inline void emutls_setspecific(emutls_address_array *value) {
+    pthread_setspecific(emutls_pthread_key, (void*) value);
+}
+
+static __inline emutls_address_array* emutls_getspecific() {
+    return (emutls_address_array*) pthread_getspecific(emutls_pthread_key);
+}
+
+#else
+
+#include <windows.h>
+#include <malloc.h>
+#include <stdio.h>
+#include <assert.h>
+#include <immintrin.h>
+
+static LPCRITICAL_SECTION emutls_mutex;
+static DWORD emutls_tls_index = TLS_OUT_OF_INDEXES;
+
+typedef uintptr_t gcc_word;
+typedef void * gcc_pointer;
+
+static void win_error(DWORD last_err, const char *hint) {
+    char *buffer = NULL;
+    if (FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER |
+                       FORMAT_MESSAGE_FROM_SYSTEM |
+                       FORMAT_MESSAGE_MAX_WIDTH_MASK,
+                       NULL, last_err, 0, (LPSTR)&buffer, 1, NULL)) {
+        fprintf(stderr, "Windows error: %s\n", buffer);
+    } else {
+        fprintf(stderr, "Unkown Windows error: %s\n", hint);
+    }
+    LocalFree(buffer);
+}
+
+static __inline void win_abort(DWORD last_err, const char *hint) {
+    win_error(last_err, hint);
+    abort();
+}
+
+static __inline void *emutls_memalign_alloc(size_t align, size_t size) {
+    void *base = _aligned_malloc(size, align);
+    if (!base)
+        win_abort(GetLastError(), "_aligned_malloc");
+    return base;
+}
+
+static __inline void emutls_memalign_free(void *base) {
+    _aligned_free(base);
+}
+
+static void emutls_exit(void) {
+    if (emutls_mutex) {
+        DeleteCriticalSection(emutls_mutex);
+        _aligned_free(emutls_mutex);
+        emutls_mutex = NULL;
+    }
+    if (emutls_tls_index != TLS_OUT_OF_INDEXES) {
+        emutls_shutdown((emutls_address_array*)TlsGetValue(emutls_tls_index));
+        TlsFree(emutls_tls_index);
+        emutls_tls_index = TLS_OUT_OF_INDEXES;
+    }
+}
+
+#pragma warning (push)
+#pragma warning (disable : 4100)
+static BOOL CALLBACK emutls_init(PINIT_ONCE p0, PVOID p1, PVOID *p2) {
+    emutls_mutex = (LPCRITICAL_SECTION)_aligned_malloc(sizeof(CRITICAL_SECTION), 16);
+    if (!emutls_mutex) {
+        win_error(GetLastError(), "_aligned_malloc");
+        return FALSE;
+    }
+    InitializeCriticalSection(emutls_mutex);
+
+    emutls_tls_index = TlsAlloc();
+    if (emutls_tls_index == TLS_OUT_OF_INDEXES) {
+        emutls_exit();
+        win_error(GetLastError(), "TlsAlloc");
+        return FALSE;
+    }
+    atexit(&emutls_exit);
+    return TRUE;
+}
+
+static __inline void emutls_init_once(void) {
+    static INIT_ONCE once;
+    InitOnceExecuteOnce(&once, emutls_init, NULL, NULL);
+}
+
+static __inline void emutls_lock() {
+    EnterCriticalSection(emutls_mutex);
+}
+
+static __inline void emutls_unlock() {
+    LeaveCriticalSection(emutls_mutex);
+}
+
+static __inline void emutls_setspecific(emutls_address_array *value) {
+    if (TlsSetValue(emutls_tls_index, (LPVOID) value) == 0)
+        win_abort(GetLastError(), "TlsSetValue");
+}
+
+static __inline emutls_address_array* emutls_getspecific() {
+    LPVOID value = TlsGetValue(emutls_tls_index);
+    if (value == NULL) {
+        const DWORD err = GetLastError();
+        if (err != ERROR_SUCCESS)
+            win_abort(err, "TlsGetValue");
+    }
+    return (emutls_address_array*) value;
+}
+
+/* Provide atomic load/store functions for emutls_get_index if built with MSVC.
+ */
+#if !defined(__ATOMIC_RELEASE)
+
+enum { __ATOMIC_ACQUIRE = 2, __ATOMIC_RELEASE = 3 };
+
+static __inline uintptr_t __atomic_load_n(void *ptr, unsigned type) {
+    assert(type == __ATOMIC_ACQUIRE);
+#ifdef _WIN64
+    return (uintptr_t) _load_be_u64(ptr);
+#else
+    return (uintptr_t) _load_be_u32(ptr);
+#endif
+}
+
+static __inline void __atomic_store_n(void *ptr, uintptr_t val, unsigned type) {
+    assert(type == __ATOMIC_RELEASE);
+#ifdef _WIN64
+    _store_be_u64(ptr, val);
+#else
+    _store_be_u32(ptr, val);
+#endif
+}
+
+#endif
+
+#pragma warning (pop)
+
+#endif
+
+static size_t emutls_num_object = 0;  /* number of allocated TLS objects */
+
+/* Free the allocated TLS data
+ */
+static void emutls_shutdown(emutls_address_array *array) {
+    if (array) {
+        uintptr_t i;
+        for (i = 0; i < array->size; ++i) {
+            if (array->data[i])
+                emutls_memalign_free(array->data[i]);
+        }
+    }
+}
+
+/* For every TLS variable xyz,
+ * there is one __emutls_control variable named __emutls_v.xyz.
+ * If xyz has non-zero initial value, __emutls_v.xyz's "value"
+ * will point to __emutls_t.xyz, which has the initial value.
+ */
+typedef struct __emutls_control {
+    /* Must use gcc_word here, instead of size_t, to match GCC.  When
+       gcc_word is larger than size_t, the upper extra bits are all
+       zeros.  We can use variables of size_t to operate on size and
+       align.  */
+    gcc_word size;  /* size of the object in bytes */
+    gcc_word align;  /* alignment of the object in bytes */
+    union {
+        uintptr_t index;  /* data[index-1] is the object address */
+        void* address;  /* object address, when in single thread env */
+    } object;
+    void* value;  /* null or non-zero initial value for the object */
+} __emutls_control;
+
 /* Emulated TLS objects are always allocated at run-time. */
 static __inline void *emutls_allocate_object(__emutls_control *control) {
     /* Use standard C types, check with gcc's emutls.o. */
-    typedef unsigned int gcc_pointer __attribute__((mode(pointer)));
     COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(gcc_pointer));
     COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(void*));
 
@@ -93,45 +286,19 @@ static __inline void *emutls_allocate_object(__emutls_control *control) {
     return base;
 }
 
-static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-static size_t emutls_num_object = 0;  /* number of allocated TLS objects */
-
-typedef struct emutls_address_array {
-    uintptr_t size;  /* number of elements in the 'data' array */
-    void* data[];
-} emutls_address_array;
-
-static pthread_key_t emutls_pthread_key;
-
-static void emutls_key_destructor(void* ptr) {
-    emutls_address_array* array = (emutls_address_array*)ptr;
-    uintptr_t i;
-    for (i = 0; i < array->size; ++i) {
-        if (array->data[i])
-            emutls_memalign_free(array->data[i]);
-    }
-    free(ptr);
-}
-
-static void emutls_init(void) {
-    if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0)
-        abort();
-}
 
 /* Returns control->object.index; set index if not allocated yet. */
 static __inline uintptr_t emutls_get_index(__emutls_control *control) {
     uintptr_t index = __atomic_load_n(&control->object.index, __ATOMIC_ACQUIRE);
     if (!index) {
-        static pthread_once_t once = PTHREAD_ONCE_INIT;
-        pthread_once(&once, emutls_init);
-        pthread_mutex_lock(&emutls_mutex);
+        emutls_init_once();
+        emutls_lock();
         index = control->object.index;
         if (!index) {
             index = ++emutls_num_object;
             __atomic_store_n(&control->object.index, index, __ATOMIC_RELEASE);
         }
-        pthread_mutex_unlock(&emutls_mutex);
+        emutls_unlock();
     }
     return index;
 }
@@ -142,7 +309,7 @@ static __inline void emutls_check_array_set_size(emutls_address_array *array,
     if (array == NULL)
         abort();
     array->size = size;
-    pthread_setspecific(emutls_pthread_key, (void*)array);
+    emutls_setspecific(array);
 }
 
 /* Returns the new 'data' array size, number of elements,
@@ -156,22 +323,29 @@ static __inline uintptr_t emutls_new_data_array_size(uintptr_t index) {
     return ((index + 1 + 15) & ~((uintptr_t)15)) - 1;
 }
 
+/* Returns the size in bytes required for an emutls_address_array with
+ * N number of elements for data field.
+ */
+static __inline uintptr_t emutls_asize(uintptr_t N) {
+    return N * sizeof(void *) + sizeof(emutls_address_array);
+}
+
 /* Returns the thread local emutls_address_array.
  * Extends its size if necessary to hold address at index.
  */
 static __inline emutls_address_array *
 emutls_get_address_array(uintptr_t index) {
-    emutls_address_array* array = pthread_getspecific(emutls_pthread_key);
+    emutls_address_array* array = emutls_getspecific();
     if (array == NULL) {
         uintptr_t new_size = emutls_new_data_array_size(index);
-        array = malloc(new_size * sizeof(void *) + sizeof(emutls_address_array));
+        array = (emutls_address_array*) malloc(emutls_asize(new_size));
         if (array)
             memset(array->data, 0, new_size * sizeof(void*));
         emutls_check_array_set_size(array, new_size);
     } else if (index > array->size) {
         uintptr_t orig_size = array->size;
         uintptr_t new_size = emutls_new_data_array_size(index);
-        array = realloc(array, new_size * sizeof(void *) + sizeof(emutls_address_array));
+        array = (emutls_address_array*) realloc(array, emutls_asize(new_size));
         if (array)
             memset(array->data + orig_size, 0,
                    (new_size - orig_size) * sizeof(void*));
@@ -182,8 +356,8 @@ emutls_get_address_array(uintptr_t index) {
 
 void* __emutls_get_address(__emutls_control* control) {
     uintptr_t index = emutls_get_index(control);
-    emutls_address_array* array = emutls_get_address_array(index);
-    if (array->data[index - 1] == NULL)
-        array->data[index - 1] = emutls_allocate_object(control);
-    return array->data[index - 1];
+    emutls_address_array* array = emutls_get_address_array(index--);
+    if (array->data[index] == NULL)
+        array->data[index] = emutls_allocate_object(control);
+    return array->data[index];
 }
diff --git a/lib/libcompiler_rt/extendhfsf2.c b/lib/libcompiler_rt/extendhfsf2.c
index 27115a48c18..e7d9fde8abf 100644
--- a/lib/libcompiler_rt/extendhfsf2.c
+++ b/lib/libcompiler_rt/extendhfsf2.c
@@ -12,8 +12,6 @@
 #define DST_SINGLE
 #include "fp_extend_impl.inc"
 
-ARM_EABI_FNALIAS(h2f, extendhfsf2)
-
 // Use a forwarding definition and noinline to implement a poor man's alias,
 // as there isn't a good cross-platform way of defining one.
 COMPILER_RT_ABI NOINLINE float __extendhfsf2(uint16_t a) {
@@ -23,3 +21,10 @@ COMPILER_RT_ABI NOINLINE float __extendhfsf2(uint16_t a) {
 COMPILER_RT_ABI float __gnu_h2f_ieee(uint16_t a) {
     return __extendhfsf2(a);
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI float __aeabi_h2f(uint16_t a) {
+  return __extendhfsf2(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/extendsfdf2.c b/lib/libcompiler_rt/extendsfdf2.c
index 7a267c2f47a..b9e7a7471a9 100644
--- a/lib/libcompiler_rt/extendsfdf2.c
+++ b/lib/libcompiler_rt/extendsfdf2.c
@@ -12,8 +12,13 @@
 #define DST_DOUBLE
 #include "fp_extend_impl.inc"
 
-ARM_EABI_FNALIAS(f2d, extendsfdf2)
-
 COMPILER_RT_ABI double __extendsfdf2(float a) {
     return __extendXfYf2__(a);
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI double __aeabi_f2d(float a) {
+  return __extendsfdf2(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/ffssi2.c b/lib/libcompiler_rt/ffssi2.c
new file mode 100644
index 00000000000..e5180eff5e0
--- /dev/null
+++ b/lib/libcompiler_rt/ffssi2.c
@@ -0,0 +1,29 @@
+/* ===-- ffssi2.c - Implement __ffssi2 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ffssi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: the index of the least significant 1-bit in a, or
+ * the value zero if a is zero. The least significant bit is index one.
+ */
+
+COMPILER_RT_ABI si_int
+__ffssi2(si_int a)
+{
+    if (a == 0)
+    {
+        return 0;
+    }
+    return __builtin_ctz(a) + 1;
+}
diff --git a/lib/libcompiler_rt/fixdfdi.c b/lib/libcompiler_rt/fixdfdi.c
index 14283ef42e6..31d76df2825 100644
--- a/lib/libcompiler_rt/fixdfdi.c
+++ b/lib/libcompiler_rt/fixdfdi.c
@@ -10,7 +10,6 @@
 
 #define DOUBLE_PRECISION
 #include "fp_lib.h"
-ARM_EABI_FNALIAS(d2lz, fixdfdi)
 
 #ifndef __SOFT_FP__
 /* Support for systems that have hardware floating-point; can set the invalid
@@ -44,3 +43,15 @@ __fixdfdi(fp_t a) {
 }
 
 #endif
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI di_int
+#if defined(__SOFT_FP__)
+__aeabi_d2lz(fp_t a) {
+#else
+__aeabi_d2lz(double a) {
+#endif
+  return __fixdfdi(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/fixdfsi.c b/lib/libcompiler_rt/fixdfsi.c
index 704e65bc43a..fc316dcd054 100644
--- a/lib/libcompiler_rt/fixdfsi.c
+++ b/lib/libcompiler_rt/fixdfsi.c
@@ -14,9 +14,14 @@ typedef si_int fixint_t;
 typedef su_int fixuint_t;
 #include "fp_fixint_impl.inc"
 
-ARM_EABI_FNALIAS(d2iz, fixdfsi)
-
 COMPILER_RT_ABI si_int
 __fixdfsi(fp_t a) {
     return __fixint(a);
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI si_int __aeabi_d2iz(fp_t a) {
+  return __fixdfsi(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/fixsfdi.c b/lib/libcompiler_rt/fixsfdi.c
index fab47e272a2..c43473637d6 100644
--- a/lib/libcompiler_rt/fixsfdi.c
+++ b/lib/libcompiler_rt/fixsfdi.c
@@ -11,8 +11,6 @@
 #define SINGLE_PRECISION
 #include "fp_lib.h"
 
-ARM_EABI_FNALIAS(f2lz, fixsfdi)
-
 #ifndef __SOFT_FP__
 /* Support for systems that have hardware floating-point; can set the invalid
  * flag as a side-effect of computation.
@@ -45,3 +43,15 @@ __fixsfdi(fp_t a) {
 }
 
 #endif
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI di_int
+#if defined(__SOFT_FP__)
+__aeabi_f2lz(fp_t a) {
+#else
+__aeabi_f2lz(float a) {
+#endif
+  return __fixsfdi(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/fixsfsi.c b/lib/libcompiler_rt/fixsfsi.c
index f045536d685..3276df96646 100644
--- a/lib/libcompiler_rt/fixsfsi.c
+++ b/lib/libcompiler_rt/fixsfsi.c
@@ -14,9 +14,14 @@ typedef si_int fixint_t;
 typedef su_int fixuint_t;
 #include "fp_fixint_impl.inc"
 
-ARM_EABI_FNALIAS(f2iz, fixsfsi)
-
 COMPILER_RT_ABI si_int
 __fixsfsi(fp_t a) {
     return __fixint(a);
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI si_int __aeabi_f2iz(fp_t a) {
+  return __fixsfsi(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/fixunsdfdi.c b/lib/libcompiler_rt/fixunsdfdi.c
index 4b0bc9e1d05..b734409709b 100644
--- a/lib/libcompiler_rt/fixunsdfdi.c
+++ b/lib/libcompiler_rt/fixunsdfdi.c
@@ -11,8 +11,6 @@
 #define DOUBLE_PRECISION
 #include "fp_lib.h"
 
-ARM_EABI_FNALIAS(d2ulz, fixunsdfdi)
-
 #ifndef __SOFT_FP__
 /* Support for systems that have hardware floating-point; can set the invalid
  * flag as a side-effect of computation.
@@ -42,3 +40,15 @@ __fixunsdfdi(fp_t a) {
 }
 
 #endif
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI du_int
+#if defined(__SOFT_FP__)
+__aeabi_d2ulz(fp_t a) {
+#else
+__aeabi_d2ulz(double a) {
+#endif
+  return __fixunsdfdi(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/fixunsdfsi.c b/lib/libcompiler_rt/fixunsdfsi.c
index 232d342d77d..bb3d8e0f831 100644
--- a/lib/libcompiler_rt/fixunsdfsi.c
+++ b/lib/libcompiler_rt/fixunsdfsi.c
@@ -13,9 +13,14 @@
 typedef su_int fixuint_t;
 #include "fp_fixuint_impl.inc"
 
-ARM_EABI_FNALIAS(d2uiz, fixunsdfsi)
-
 COMPILER_RT_ABI su_int
 __fixunsdfsi(fp_t a) {
     return __fixuint(a);
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI su_int __aeabi_d2uiz(fp_t a) {
+  return __fixunsdfsi(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/fixunssfdi.c b/lib/libcompiler_rt/fixunssfdi.c
index f8ebab854f9..5d92245df0d 100644
--- a/lib/libcompiler_rt/fixunssfdi.c
+++ b/lib/libcompiler_rt/fixunssfdi.c
@@ -11,8 +11,6 @@
 #define SINGLE_PRECISION
 #include "fp_lib.h"
 
-ARM_EABI_FNALIAS(f2ulz, fixunssfdi)
-
 #ifndef __SOFT_FP__
 /* Support for systems that have hardware floating-point; can set the invalid
  * flag as a side-effect of computation.
@@ -43,3 +41,15 @@ __fixunssfdi(fp_t a) {
 }
 
 #endif
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI du_int
+#if defined(__SOFT_FP__)
+__aeabi_f2ulz(fp_t a) {
+#else
+__aeabi_f2ulz(float a) {
+#endif
+  return __fixunssfdi(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/fixunssfsi.c b/lib/libcompiler_rt/fixunssfsi.c
index cc2b05bd84f..91d5e8ae5d7 100644
--- a/lib/libcompiler_rt/fixunssfsi.c
+++ b/lib/libcompiler_rt/fixunssfsi.c
@@ -17,9 +17,14 @@
 typedef su_int fixuint_t;
 #include "fp_fixuint_impl.inc"
 
-ARM_EABI_FNALIAS(f2uiz, fixunssfsi)
-
 COMPILER_RT_ABI su_int
 __fixunssfsi(fp_t a) {
     return __fixuint(a);
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI su_int __aeabi_f2uiz(fp_t a) {
+  return __fixunssfsi(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/floatdidf.c b/lib/libcompiler_rt/floatdidf.c
index 2b023ad08be..681fecef968 100644
--- a/lib/libcompiler_rt/floatdidf.c
+++ b/lib/libcompiler_rt/floatdidf.c
@@ -22,8 +22,6 @@
 
 /* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */
 
-ARM_EABI_FNALIAS(l2d, floatdidf)
-
 #ifndef __SOFT_FP__
 /* Support for systems that have hardware floating-point; we'll set the inexact flag
  * as a side-effect of this computation.
@@ -105,3 +103,10 @@ __floatdidf(di_int a)
     return fb.f;
 }
 #endif
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI double __aeabi_l2d(di_int a) {
+  return __floatdidf(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/floatdisf.c b/lib/libcompiler_rt/floatdisf.c
index 3e47580ef57..dd548165c37 100644
--- a/lib/libcompiler_rt/floatdisf.c
+++ b/lib/libcompiler_rt/floatdisf.c
@@ -22,8 +22,6 @@
 
 #include "int_lib.h"
 
-ARM_EABI_FNALIAS(l2f, floatdisf)
-
 COMPILER_RT_ABI float
 __floatdisf(di_int a)
 {
@@ -78,3 +76,10 @@ __floatdisf(di_int a)
            ((su_int)a & 0x007FFFFF);   /* mantissa */
     return fb.f;
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI float __aeabi_l2f(di_int a) {
+  return __floatdisf(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/floatsidf.c b/lib/libcompiler_rt/floatsidf.c
index 1cf99b782a6..2ae395bdc1d 100644
--- a/lib/libcompiler_rt/floatsidf.c
+++ b/lib/libcompiler_rt/floatsidf.c
@@ -18,8 +18,6 @@
 
 #include "int_lib.h"
 
-ARM_EABI_FNALIAS(i2d, floatsidf)
-
 COMPILER_RT_ABI fp_t
 __floatsidf(int a) {
     
@@ -51,3 +49,10 @@ __floatsidf(int a) {
     // Insert the sign bit and return
     return fromRep(result | sign);
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_i2d(int a) {
+  return __floatsidf(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/floatsisf.c b/lib/libcompiler_rt/floatsisf.c
index 467dd1d1eaf..08891fcdf20 100644
--- a/lib/libcompiler_rt/floatsisf.c
+++ b/lib/libcompiler_rt/floatsisf.c
@@ -18,8 +18,6 @@
 
 #include "int_lib.h"
 
-ARM_EABI_FNALIAS(i2f, floatsisf)
-
 COMPILER_RT_ABI fp_t
 __floatsisf(int a) {
     
@@ -57,3 +55,10 @@ __floatsisf(int a) {
     // Insert the sign bit and return
     return fromRep(result | sign);
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_i2f(int a) {
+  return __floatsisf(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/floattitf.c b/lib/libcompiler_rt/floattitf.c
new file mode 100644
index 00000000000..994fded3947
--- /dev/null
+++ b/lib/libcompiler_rt/floattitf.c
@@ -0,0 +1,82 @@
+//===-- lib/floattitf.c - int128 -> quad-precision conversion -----*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ti_int to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+#include "int_lib.h"
+
+/* Returns: convert a ti_int to a fp_t, rounding toward even. */
+
+/* Assumption: fp_t is a IEEE 128 bit floating point type
+ *             ti_int is a 128 bit integral type
+ */
+
+/* seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm |
+ * mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+COMPILER_RT_ABI fp_t
+__floattitf(ti_int a) {
+    if (a == 0)
+        return 0.0;
+    const unsigned N = sizeof(ti_int) * CHAR_BIT;
+    const ti_int s = a >> (N-1);
+    a = (a ^ s) - s;
+    int sd = N - __clzti2(a);  /* number of significant digits */
+    int e = sd - 1;            /* exponent */
+    if (sd > LDBL_MANT_DIG) {
+        /*  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+         *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+         *                                                12345678901234567890123456
+         *  1 = msb 1 bit
+         *  P = bit LDBL_MANT_DIG-1 bits to the right of 1
+         *  Q = bit LDBL_MANT_DIG bits to the right of 1
+         *  R = "or" of all bits to the right of Q
+         */
+        switch (sd) {
+        case LDBL_MANT_DIG + 1:
+            a <<= 1;
+            break;
+        case LDBL_MANT_DIG + 2:
+            break;
+        default:
+            a = ((tu_int)a >> (sd - (LDBL_MANT_DIG+2))) |
+                ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0);
+        };
+        /* finish: */
+        a |= (a & 4) != 0;  /* Or P into R */
+        ++a;  /* round - this step may add a significant bit */
+        a >>= 2;  /* dump Q and R */
+        /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */
+        if (a & ((tu_int)1 << LDBL_MANT_DIG)) {
+            a >>= 1;
+            ++e;
+        }
+        /* a is now rounded to LDBL_MANT_DIG bits */
+    } else {
+        a <<= (LDBL_MANT_DIG - sd);
+        /* a is now rounded to LDBL_MANT_DIG bits */
+    }
+
+    long_double_bits fb;
+    fb.u.high.all = (s & 0x8000000000000000LL)           /* sign */
+                  | (du_int)(e + 16383) << 48            /* exponent */
+                  | ((a >> 64) & 0x0000ffffffffffffLL);  /* significand */
+    fb.u.low.all = (du_int)(a);
+    return fb.f;
+}
+
+#endif
diff --git a/lib/libcompiler_rt/floatundidf.c b/lib/libcompiler_rt/floatundidf.c
index cfd3a7a3b33..6c1a931ef2f 100644
--- a/lib/libcompiler_rt/floatundidf.c
+++ b/lib/libcompiler_rt/floatundidf.c
@@ -22,8 +22,6 @@
 
 #include "int_lib.h"
 
-ARM_EABI_FNALIAS(ul2d, floatundidf)
-
 #ifndef __SOFT_FP__
 /* Support for systems that have hardware floating-point; we'll set the inexact flag
  * as a side-effect of this computation.
@@ -104,3 +102,10 @@ __floatundidf(du_int a)
     return fb.f;
 }
 #endif
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI double __aeabi_ul2d(du_int a) {
+  return __floatundidf(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/floatundisf.c b/lib/libcompiler_rt/floatundisf.c
index 713a44abc8b..86841a75dc6 100644
--- a/lib/libcompiler_rt/floatundisf.c
+++ b/lib/libcompiler_rt/floatundisf.c
@@ -22,8 +22,6 @@
 
 #include "int_lib.h"
 
-ARM_EABI_FNALIAS(ul2f, floatundisf)
-
 COMPILER_RT_ABI float
 __floatundisf(du_int a)
 {
@@ -75,3 +73,10 @@ __floatundisf(du_int a)
            ((su_int)a & 0x007FFFFF);  /* mantissa */
     return fb.f;
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI float __aeabi_ul2f(du_int a) {
+  return __floatundisf(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/floatunsidf.c b/lib/libcompiler_rt/floatunsidf.c
index 445e18041c4..8d4807194f0 100644
--- a/lib/libcompiler_rt/floatunsidf.c
+++ b/lib/libcompiler_rt/floatunsidf.c
@@ -18,8 +18,6 @@
 
 #include "int_lib.h"
 
-ARM_EABI_FNALIAS(ui2d, floatunsidf)
-
 COMPILER_RT_ABI fp_t
 __floatunsidf(unsigned int a) {
     
@@ -40,3 +38,10 @@ __floatunsidf(unsigned int a) {
     result += (rep_t)(exponent + exponentBias) << significandBits;
     return fromRep(result);
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_ui2d(unsigned int a) {
+  return __floatunsidf(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/floatunsisf.c b/lib/libcompiler_rt/floatunsisf.c
index ea6f161adc0..f194c046d2f 100644
--- a/lib/libcompiler_rt/floatunsisf.c
+++ b/lib/libcompiler_rt/floatunsisf.c
@@ -18,8 +18,6 @@
 
 #include "int_lib.h"
 
-ARM_EABI_FNALIAS(ui2f, floatunsisf)
-
 COMPILER_RT_ABI fp_t
 __floatunsisf(unsigned int a) {
     
@@ -48,3 +46,10 @@ __floatunsisf(unsigned int a) {
     result += (rep_t)(exponent + exponentBias) << significandBits;
     return fromRep(result);
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_ui2f(unsigned int a) {
+  return __floatunsisf(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/floatuntitf.c b/lib/libcompiler_rt/floatuntitf.c
new file mode 100644
index 00000000000..e2518c93f23
--- /dev/null
+++ b/lib/libcompiler_rt/floatuntitf.c
@@ -0,0 +1,79 @@
+//===-- lib/floatuntitf.c - uint128 -> quad-precision conversion --*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements tu_int to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+#include "int_lib.h"
+
+/* Returns: convert a tu_int to a fp_t, rounding toward even. */
+
+/* Assumption: fp_t is a IEEE 128 bit floating point type
+ *             tu_int is a 128 bit integral type
+ */
+
+/* seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm |
+ * mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+COMPILER_RT_ABI fp_t
+__floatuntitf(tu_int a) {
+    if (a == 0)
+        return 0.0;
+    const unsigned N = sizeof(tu_int) * CHAR_BIT;
+    int sd = N - __clzti2(a);  /* number of significant digits */
+    int e = sd - 1;            /* exponent */
+    if (sd > LDBL_MANT_DIG) {
+        /*  start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+         *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+         *                                                12345678901234567890123456
+         *  1 = msb 1 bit
+         *  P = bit LDBL_MANT_DIG-1 bits to the right of 1
+         *  Q = bit LDBL_MANT_DIG bits to the right of 1
+         *  R = "or" of all bits to the right of Q
+         */
+        switch (sd) {
+        case LDBL_MANT_DIG + 1:
+            a <<= 1;
+            break;
+        case LDBL_MANT_DIG + 2:
+            break;
+        default:
+            a = (a >> (sd - (LDBL_MANT_DIG+2))) |
+                ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0);
+        };
+        /* finish: */
+        a |= (a & 4) != 0;  /* Or P into R */
+        ++a;  /* round - this step may add a significant bit */
+        a >>= 2;  /* dump Q and R */
+        /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */
+        if (a & ((tu_int)1 << LDBL_MANT_DIG)) {
+            a >>= 1;
+            ++e;
+        }
+        /* a is now rounded to LDBL_MANT_DIG bits */
+    } else {
+        a <<= (LDBL_MANT_DIG - sd);
+        /* a is now rounded to LDBL_MANT_DIG bits */
+    }
+
+    long_double_bits fb;
+    fb.u.high.all = (du_int)(e + 16383) << 48            /* exponent */
+                  | ((a >> 64) & 0x0000ffffffffffffLL);  /* significand */
+    fb.u.low.all = (du_int)(a);
+    return fb.f;
+}
+
+#endif
diff --git a/lib/libcompiler_rt/i386/Makefile.mk b/lib/libcompiler_rt/i386/Makefile.mk
deleted file mode 100644
index f3776a02c0d..00000000000
--- a/lib/libcompiler_rt/i386/Makefile.mk
+++ /dev/null
@@ -1,20 +0,0 @@
-#===- lib/builtins/i386/Makefile.mk ------------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs := 
-OnlyArchs := i386
-
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
-Implementation := Optimized
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
diff --git a/lib/libcompiler_rt/int_lib.h b/lib/libcompiler_rt/int_lib.h
index 8dfe5672d13..9a8092d50d8 100644
--- a/lib/libcompiler_rt/int_lib.h
+++ b/lib/libcompiler_rt/int_lib.h
@@ -30,14 +30,17 @@
 /* ABI macro definitions */
 
 #if __ARM_EABI__
-# define ARM_EABI_FNALIAS(aeabi_name, name)         \
-  void __aeabi_##aeabi_name() __attribute__((alias("__" #name)));
-# define COMPILER_RT_ABI __attribute__((pcs("aapcs")))
+# ifdef COMPILER_RT_ARMHF_TARGET
+#   define COMPILER_RT_ABI
+# else
+#   define COMPILER_RT_ABI __attribute__((__pcs__("aapcs")))
+# endif
 #else
-# define ARM_EABI_FNALIAS(aeabi_name, name)
 # define COMPILER_RT_ABI
 #endif
 
+#define AEABI_RTABI __attribute__((__pcs__("aapcs")))
+
 #ifdef _MSC_VER
 #define ALWAYS_INLINE __forceinline
 #define NOINLINE __declspec(noinline)
@@ -91,14 +94,14 @@ COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem);
 #include <intrin.h>
 
 uint32_t __inline __builtin_ctz(uint32_t value) {
-  uint32_t trailing_zero = 0;
+  unsigned long trailing_zero = 0;
   if (_BitScanForward(&trailing_zero, value))
     return trailing_zero;
   return 32;
 }
 
 uint32_t __inline __builtin_clz(uint32_t value) {
-  uint32_t leading_zero = 0;
+  unsigned long leading_zero = 0;
   if (_BitScanReverse(&leading_zero, value))
     return 31 - leading_zero;
   return 32;
@@ -106,7 +109,7 @@ uint32_t __inline __builtin_clz(uint32_t value) {
 
 #if defined(_M_ARM) || defined(_M_X64)
 uint32_t __inline __builtin_clzll(uint64_t value) {
-  uint32_t leading_zero = 0;
+  unsigned long leading_zero = 0;
   if (_BitScanReverse64(&leading_zero, value))
     return 63 - leading_zero;
   return 64;
diff --git a/lib/libcompiler_rt/int_types.h b/lib/libcompiler_rt/int_types.h
index 660385ecd6a..a92238c5b73 100644
--- a/lib/libcompiler_rt/int_types.h
+++ b/lib/libcompiler_rt/int_types.h
@@ -60,9 +60,7 @@ typedef union
     }s;
 } udwords;
 
-/* MIPS64 issue: PR 20098 */
-#if (defined(__LP64__) || defined(__wasm__)) && \
-    !(defined(__mips__) && defined(__clang__))
+#if (defined(__LP64__) || defined(__wasm__) || defined(__mips64))
 #define CRT_HAS_128BIT
 #endif
 
diff --git a/lib/libcompiler_rt/int_util.c b/lib/libcompiler_rt/int_util.c
index 420d1e237aa..de87410dbca 100644
--- a/lib/libcompiler_rt/int_util.c
+++ b/lib/libcompiler_rt/int_util.c
@@ -45,6 +45,16 @@ void compilerrt_abort_impl(const char *file, int line, const char *function) {
   __assert_rtn(function, file, line, "libcompiler_rt abort");
 }
 
+#elif __Fuchsia__
+
+#ifndef _WIN32
+__attribute__((weak))
+__attribute__((visibility("hidden")))
+#endif
+void compilerrt_abort_impl(const char *file, int line, const char *function) {
+  __builtin_trap();
+}
+
 #else
 
 /* Get the system definition of abort() */
diff --git a/lib/libcompiler_rt/lshrdi3.c b/lib/libcompiler_rt/lshrdi3.c
index 6b1ea923b77..becbbef4eb0 100644
--- a/lib/libcompiler_rt/lshrdi3.c
+++ b/lib/libcompiler_rt/lshrdi3.c
@@ -18,8 +18,6 @@
 
 /* Precondition:  0 <= b < bits_in_dword */
 
-ARM_EABI_FNALIAS(llsr, lshrdi3)
-
 COMPILER_RT_ABI di_int
 __lshrdi3(di_int a, si_int b)
 {
@@ -41,3 +39,10 @@ __lshrdi3(di_int a, si_int b)
     }
     return result.all;
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI di_int __aeabi_llsr(di_int a, si_int b) {
+  return __lshrdi3(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/mingw_fixfloat.c b/lib/libcompiler_rt/mingw_fixfloat.c
new file mode 100644
index 00000000000..c462e0dbf65
--- /dev/null
+++ b/lib/libcompiler_rt/mingw_fixfloat.c
@@ -0,0 +1,36 @@
+/* ===-- mingw_fixfloat.c - Wrap int/float conversions for arm/windows -----===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI di_int __fixdfdi(double a);
+COMPILER_RT_ABI di_int __fixsfdi(float a);
+COMPILER_RT_ABI du_int __fixunsdfdi(double a);
+COMPILER_RT_ABI du_int __fixunssfdi(float a);
+COMPILER_RT_ABI double __floatdidf(di_int a);
+COMPILER_RT_ABI float __floatdisf(di_int a);
+COMPILER_RT_ABI double __floatundidf(du_int a);
+COMPILER_RT_ABI float __floatundisf(du_int a);
+
+COMPILER_RT_ABI di_int __dtoi64(double a) { return __fixdfdi(a); }
+
+COMPILER_RT_ABI di_int __stoi64(float a) { return __fixsfdi(a); }
+
+COMPILER_RT_ABI du_int __dtou64(double a) { return __fixunsdfdi(a); }
+
+COMPILER_RT_ABI du_int __stou64(float a) { return __fixunssfdi(a); }
+
+COMPILER_RT_ABI double __i64tod(di_int a) { return __floatdidf(a); }
+
+COMPILER_RT_ABI float __i64tos(di_int a) { return __floatdisf(a); }
+
+COMPILER_RT_ABI double __u64tod(du_int a) { return __floatundidf(a); }
+
+COMPILER_RT_ABI float __u64tos(du_int a) { return __floatundisf(a); }
diff --git a/lib/libcompiler_rt/muldf3.c b/lib/libcompiler_rt/muldf3.c
index 1eb733849e5..59a60190eba 100644
--- a/lib/libcompiler_rt/muldf3.c
+++ b/lib/libcompiler_rt/muldf3.c
@@ -15,8 +15,13 @@
 #define DOUBLE_PRECISION
 #include "fp_mul_impl.inc"
 
-ARM_EABI_FNALIAS(dmul, muldf3)
-
 COMPILER_RT_ABI fp_t __muldf3(fp_t a, fp_t b) {
     return __mulXf3__(a, b);
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_dmul(fp_t a, fp_t b) {
+  return __muldf3(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/muldi3.c b/lib/libcompiler_rt/muldi3.c
index 2dae44c11b9..6818a9e2f72 100644
--- a/lib/libcompiler_rt/muldi3.c
+++ b/lib/libcompiler_rt/muldi3.c
@@ -40,8 +40,6 @@ __muldsi3(su_int a, su_int b)
 
 /* Returns: a * b */
 
-ARM_EABI_FNALIAS(lmul, muldi3)
-
 COMPILER_RT_ABI di_int
 __muldi3(di_int a, di_int b)
 {
@@ -54,3 +52,10 @@ __muldi3(di_int a, di_int b)
     r.s.high += x.s.high * y.s.low + x.s.low * y.s.high;
     return r.all;
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI di_int __aeabi_lmul(di_int a, di_int b) {
+  return __muldi3(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/mulsf3.c b/lib/libcompiler_rt/mulsf3.c
index 478b3bc0e0e..f141af1acc5 100644
--- a/lib/libcompiler_rt/mulsf3.c
+++ b/lib/libcompiler_rt/mulsf3.c
@@ -15,8 +15,13 @@
 #define SINGLE_PRECISION
 #include "fp_mul_impl.inc"
 
-ARM_EABI_FNALIAS(fmul, mulsf3)
-
 COMPILER_RT_ABI fp_t __mulsf3(fp_t a, fp_t b) {
     return __mulXf3__(a, b);
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_fmul(fp_t a, fp_t b) {
+  return __mulsf3(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/negdf2.c b/lib/libcompiler_rt/negdf2.c
index d634b421cb7..5e2544cdb4b 100644
--- a/lib/libcompiler_rt/negdf2.c
+++ b/lib/libcompiler_rt/negdf2.c
@@ -14,9 +14,14 @@
 #define DOUBLE_PRECISION
 #include "fp_lib.h"
 
-ARM_EABI_FNALIAS(dneg, negdf2)
-
 COMPILER_RT_ABI fp_t
 __negdf2(fp_t a) {
     return fromRep(toRep(a) ^ signBit);
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_dneg(fp_t a) {
+  return __negdf2(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/negsf2.c b/lib/libcompiler_rt/negsf2.c
index 29c17be4145..f90b3433568 100644
--- a/lib/libcompiler_rt/negsf2.c
+++ b/lib/libcompiler_rt/negsf2.c
@@ -14,9 +14,14 @@
 #define SINGLE_PRECISION
 #include "fp_lib.h"
 
-ARM_EABI_FNALIAS(fneg, negsf2)
-
 COMPILER_RT_ABI fp_t
 __negsf2(fp_t a) {
     return fromRep(toRep(a) ^ signBit);
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_fneg(fp_t a) {
+  return __negsf2(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/ppc/Makefile.mk b/lib/libcompiler_rt/ppc/Makefile.mk
deleted file mode 100644
index 0adc623aa04..00000000000
--- a/lib/libcompiler_rt/ppc/Makefile.mk
+++ /dev/null
@@ -1,20 +0,0 @@
-#===- lib/builtins/ppc/Makefile.mk -------------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs := 
-OnlyArchs := ppc
-
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
-Implementation := Optimized
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
diff --git a/lib/libcompiler_rt/subdf3.c b/lib/libcompiler_rt/subdf3.c
index 7a79e5e7765..38340dfab1a 100644
--- a/lib/libcompiler_rt/subdf3.c
+++ b/lib/libcompiler_rt/subdf3.c
@@ -15,11 +15,15 @@
 #define DOUBLE_PRECISION
 #include "fp_lib.h"
 
-ARM_EABI_FNALIAS(dsub, subdf3)
-
 // Subtraction; flip the sign bit of b and add.
 COMPILER_RT_ABI fp_t
 __subdf3(fp_t a, fp_t b) {
     return __adddf3(a, fromRep(toRep(b) ^ signBit));
 }
 
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_dsub(fp_t a, fp_t b) {
+  return __subdf3(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/subsf3.c b/lib/libcompiler_rt/subsf3.c
index c3b85144af4..34276b1447b 100644
--- a/lib/libcompiler_rt/subsf3.c
+++ b/lib/libcompiler_rt/subsf3.c
@@ -15,11 +15,15 @@
 #define SINGLE_PRECISION
 #include "fp_lib.h"
 
-ARM_EABI_FNALIAS(fsub, subsf3)
-
 // Subtraction; flip the sign bit of b and add.
 COMPILER_RT_ABI fp_t
 __subsf3(fp_t a, fp_t b) {
     return __addsf3(a, fromRep(toRep(b) ^ signBit));
 }
 
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_fsub(fp_t a, fp_t b) {
+  return __subsf3(a, b);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/truncdfhf2.c b/lib/libcompiler_rt/truncdfhf2.c
index 17195cd9e79..4bb71aa178a 100644
--- a/lib/libcompiler_rt/truncdfhf2.c
+++ b/lib/libcompiler_rt/truncdfhf2.c
@@ -11,8 +11,13 @@
 #define DST_HALF
 #include "fp_trunc_impl.inc"
 
-ARM_EABI_FNALIAS(d2h, truncdfhf2)
-
 COMPILER_RT_ABI uint16_t __truncdfhf2(double a) {
     return __truncXfYf2__(a);
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI uint16_t __aeabi_d2h(double a) {
+  return __truncdfhf2(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/truncdfsf2.c b/lib/libcompiler_rt/truncdfsf2.c
index 46ec11dccd7..8bf58bb23a3 100644
--- a/lib/libcompiler_rt/truncdfsf2.c
+++ b/lib/libcompiler_rt/truncdfsf2.c
@@ -11,8 +11,13 @@
 #define DST_SINGLE
 #include "fp_trunc_impl.inc"
 
-ARM_EABI_FNALIAS(d2f, truncdfsf2)
-
 COMPILER_RT_ABI float __truncdfsf2(double a) {
     return __truncXfYf2__(a);
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI float __aeabi_d2f(double a) {
+  return __truncdfsf2(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/truncsfhf2.c b/lib/libcompiler_rt/truncsfhf2.c
index 9d61895bfd8..f6ce1fa1de0 100644
--- a/lib/libcompiler_rt/truncsfhf2.c
+++ b/lib/libcompiler_rt/truncsfhf2.c
@@ -11,8 +11,6 @@
 #define DST_HALF
 #include "fp_trunc_impl.inc"
 
-ARM_EABI_FNALIAS(f2h, truncsfhf2)
-
 // Use a forwarding definition and noinline to implement a poor man's alias,
 // as there isn't a good cross-platform way of defining one.
 COMPILER_RT_ABI NOINLINE uint16_t __truncsfhf2(float a) {
@@ -22,3 +20,10 @@ COMPILER_RT_ABI NOINLINE uint16_t __truncsfhf2(float a) {
 COMPILER_RT_ABI uint16_t __gnu_f2h_ieee(float a) {
     return __truncsfhf2(a);
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI uint16_t __aeabi_f2h(float a) {
+  return __truncsfhf2(a);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/udivsi3.c b/lib/libcompiler_rt/udivsi3.c
index 5d0140cc3e7..8eccf102cc9 100644
--- a/lib/libcompiler_rt/udivsi3.c
+++ b/lib/libcompiler_rt/udivsi3.c
@@ -18,8 +18,6 @@
 
 /* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
 
-ARM_EABI_FNALIAS(uidiv, udivsi3)
-
 /* This function should not call __divsi3! */
 COMPILER_RT_ABI su_int
 __udivsi3(su_int n, su_int d)
@@ -64,3 +62,10 @@ __udivsi3(su_int n, su_int d)
     q = (q << 1) | carry;
     return q;
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI su_int __aeabi_uidiv(su_int n, su_int d) {
+  return __udivsi3(n, d);
+}
+#endif
+
diff --git a/lib/libcompiler_rt/x86_64/Makefile.mk b/lib/libcompiler_rt/x86_64/Makefile.mk
deleted file mode 100644
index 83848dddd96..00000000000
--- a/lib/libcompiler_rt/x86_64/Makefile.mk
+++ /dev/null
@@ -1,20 +0,0 @@
-#===- lib/builtins/x86_64/Makefile.mk ----------------------*- Makefile -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs := 
-OnlyArchs := x86_64 x86_64h
-
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
-Implementation := Optimized
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
diff --git a/lib/libcompiler_rt/x86_64/floatdidf.c b/lib/libcompiler_rt/x86_64/floatdidf.c
index 388404e5e08..dead0ed42c6 100644
--- a/lib/libcompiler_rt/x86_64/floatdidf.c
+++ b/lib/libcompiler_rt/x86_64/floatdidf.c
@@ -4,7 +4,7 @@
 
 /* double __floatdidf(di_int a); */
 
-#ifdef __x86_64__
+#if defined(__x86_64__) || defined(_M_X64)
 
 #include "../int_lib.h"
 
diff --git a/lib/libcompiler_rt/x86_64/floatdisf.c b/lib/libcompiler_rt/x86_64/floatdisf.c
index 96c3728e92c..99d5621c632 100644
--- a/lib/libcompiler_rt/x86_64/floatdisf.c
+++ b/lib/libcompiler_rt/x86_64/floatdisf.c
@@ -2,7 +2,7 @@
  * License. See LICENSE.TXT for details.
  */
 
-#ifdef __x86_64__
+#if defined(__x86_64__) || defined(_M_X64)
 
 #include "../int_lib.h"