From: patrick Date: Tue, 26 Dec 2017 20:59:44 +0000 (+0000) Subject: Update to compiler-rt 5.0.1. X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=c72a1c0960669bfd1eb87723f902bc05ee727707;p=openbsd Update to compiler-rt 5.0.1. ok kettenis@ --- diff --git a/lib/libcompiler_rt/CMakeLists.txt b/lib/libcompiler_rt/CMakeLists.txt index 44a660f5d49..f0d3f50714c 100644 --- a/lib/libcompiler_rt/CMakeLists.txt +++ b/lib/libcompiler_rt/CMakeLists.txt @@ -13,6 +13,10 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) "${CMAKE_SOURCE_DIR}/../../cmake/Modules") include(base-config-ix) include(CompilerRTUtils) + + load_llvm_config() + construct_compiler_rt_default_triple() + if(APPLE) include(CompilerRTDarwinUtils) endif() @@ -38,7 +42,8 @@ set(GENERIC_SOURCES ashlti3.c ashrdi3.c ashrti3.c - clear_cache.c + bswapdi2.c + bswapsi2.c clzdi2.c clzsi2.c clzti2.c @@ -62,11 +67,10 @@ set(GENERIC_SOURCES divti3.c divtf3.c divxc3.c - enable_execute_stack.c - eprintf.c extendsfdf2.c extendhfsf2.c ffsdi2.c + ffssi2.c ffsti2.c fixdfdi.c fixdfsi.c @@ -128,6 +132,7 @@ set(GENERIC_SOURCES negvdi2.c negvsi2.c negvti2.c + os_version_check.c paritydi2.c paritysi2.c parityti2.c @@ -160,20 +165,43 @@ set(GENERIC_SOURCES umodsi3.c umodti3.c) -if(COMPILER_RT_SUPPORTS_ATOMIC_KEYWORD) +set(GENERIC_TF_SOURCES + comparetf2.c + extenddftf2.c + extendsftf2.c + fixtfdi.c + fixtfsi.c + fixtfti.c + fixunstfdi.c + fixunstfsi.c + fixunstfti.c + floatditf.c + floatsitf.c + floattitf.c + floatunditf.c + floatunsitf.c + floatuntitf.c + multc3.c + trunctfdf2.c + trunctfsf2.c) + +option(COMPILER_RT_EXCLUDE_ATOMIC_BUILTIN + "Skip the atomic builtin (this may be needed if system headers are unavailable)" + Off) + +if(NOT FUCHSIA AND NOT COMPILER_RT_BAREMETAL_BUILD) set(GENERIC_SOURCES ${GENERIC_SOURCES} - atomic.c) + emutls.c + enable_execute_stack.c + eprintf.c) endif() -set(MSVC_SOURCES - divsc3.c - divdc3.c - divxc3.c - mulsc3.c - muldc3.c - mulxc3.c) - +if(COMPILER_RT_HAS_ATOMIC_KEYWORD AND NOT COMPILER_RT_EXCLUDE_ATOMIC_BUILTIN) + set(GENERIC_SOURCES + ${GENERIC_SOURCES} + atomic.c) +endif() if(APPLE) set(GENERIC_SOURCES @@ -186,18 +214,18 @@ if(APPLE) atomic_thread_fence.c) endif() -if(NOT WIN32 OR MINGW) - set(GENERIC_SOURCES - ${GENERIC_SOURCES} - emutls.c) -endif() - if (HAVE_UNWIND_H) set(GENERIC_SOURCES ${GENERIC_SOURCES} gcc_personality_v0.c) endif () +if (NOT FUCHSIA) + set(GENERIC_SOURCES + ${GENERIC_SOURCES} + clear_cache.c) +endif() + if (NOT MSVC) set(x86_64_SOURCES x86_64/chkstk.S @@ -254,15 +282,54 @@ else () # MSVC x86_64/floatdidf.c x86_64/floatdisf.c x86_64/floatdixf.c - ${MSVC_SOURCES}) + ${GENERIC_SOURCES}) set(x86_64h_SOURCES ${x86_64_SOURCES}) - set(i386_SOURCES ${MSVC_SOURCES}) + set(i386_SOURCES ${GENERIC_SOURCES}) set(i686_SOURCES ${i386_SOURCES}) endif () # if (NOT MSVC) set(arm_SOURCES - arm/adddf3vfp.S - arm/addsf3vfp.S + arm/bswapdi2.S + arm/bswapsi2.S + arm/clzdi2.S + arm/clzsi2.S + arm/comparesf2.S + arm/divmodsi4.S + arm/divsi3.S + arm/modsi3.S + arm/sync_fetch_and_add_4.S + arm/sync_fetch_and_add_8.S + arm/sync_fetch_and_and_4.S + arm/sync_fetch_and_and_8.S + arm/sync_fetch_and_max_4.S + arm/sync_fetch_and_max_8.S + arm/sync_fetch_and_min_4.S + arm/sync_fetch_and_min_8.S + arm/sync_fetch_and_nand_4.S + arm/sync_fetch_and_nand_8.S + arm/sync_fetch_and_or_4.S + arm/sync_fetch_and_or_8.S + arm/sync_fetch_and_sub_4.S + arm/sync_fetch_and_sub_8.S + arm/sync_fetch_and_umax_4.S + arm/sync_fetch_and_umax_8.S + arm/sync_fetch_and_umin_4.S + arm/sync_fetch_and_umin_8.S + arm/sync_fetch_and_xor_4.S + arm/sync_fetch_and_xor_8.S + arm/udivmodsi4.S + arm/udivsi3.S + arm/umodsi3.S + ${GENERIC_SOURCES}) + +set(thumb1_SOURCES + arm/divsi3.S + arm/udivsi3.S + arm/comparesf2.S + arm/addsf3.S + ${GENERIC_SOURCES}) + +set(arm_EABI_SOURCES arm/aeabi_cdcmp.S arm/aeabi_cdcmpeq_check_nan.c arm/aeabi_cfcmp.S @@ -279,16 +346,21 @@ set(arm_SOURCES arm/aeabi_memmove.S arm/aeabi_memset.S arm/aeabi_uidivmod.S - arm/aeabi_uldivmod.S - arm/bswapdi2.S - arm/bswapsi2.S - arm/clzdi2.S - arm/clzsi2.S - arm/comparesf2.S + arm/aeabi_uldivmod.S) + +set(arm_Thumb1_JT_SOURCES + arm/switch16.S + arm/switch32.S + arm/switch8.S + arm/switchu8.S) +set(arm_Thumb1_SjLj_EH_SOURCES + arm/restore_vfp_d8_d15_regs.S + arm/save_vfp_d8_d15_regs.S) +set(arm_Thumb1_VFPv2_SOURCES + arm/adddf3vfp.S + arm/addsf3vfp.S arm/divdf3vfp.S - arm/divmodsi4.S arm/divsf3vfp.S - arm/divsi3.S arm/eqdf2vfp.S arm/eqsf2vfp.S arm/extendsfdf2vfp.S @@ -308,67 +380,64 @@ set(arm_SOURCES arm/lesf2vfp.S arm/ltdf2vfp.S arm/ltsf2vfp.S - arm/modsi3.S arm/muldf3vfp.S arm/mulsf3vfp.S arm/nedf2vfp.S arm/negdf2vfp.S arm/negsf2vfp.S arm/nesf2vfp.S - arm/restore_vfp_d8_d15_regs.S - arm/save_vfp_d8_d15_regs.S arm/subdf3vfp.S arm/subsf3vfp.S - arm/switch16.S - arm/switch32.S - arm/switch8.S - arm/switchu8.S - arm/sync_fetch_and_add_4.S - arm/sync_fetch_and_add_8.S - arm/sync_fetch_and_and_4.S - arm/sync_fetch_and_and_8.S - arm/sync_fetch_and_max_4.S - arm/sync_fetch_and_max_8.S - arm/sync_fetch_and_min_4.S - arm/sync_fetch_and_min_8.S - arm/sync_fetch_and_nand_4.S - arm/sync_fetch_and_nand_8.S - arm/sync_fetch_and_or_4.S - arm/sync_fetch_and_or_8.S - arm/sync_fetch_and_sub_4.S - arm/sync_fetch_and_sub_8.S - arm/sync_fetch_and_umax_4.S - arm/sync_fetch_and_umax_8.S - arm/sync_fetch_and_umin_4.S - arm/sync_fetch_and_umin_8.S - arm/sync_fetch_and_xor_4.S - arm/sync_fetch_and_xor_8.S - arm/sync_synchronize.S arm/truncdfsf2vfp.S - arm/udivmodsi4.S - arm/udivsi3.S - arm/umodsi3.S arm/unorddf2vfp.S - arm/unordsf2vfp.S - ${GENERIC_SOURCES}) + arm/unordsf2vfp.S) +set(arm_Thumb1_icache_SOURCES + arm/sync_synchronize.S) +set(arm_Thumb1_SOURCES + ${arm_Thumb1_JT_SOURCES} + ${arm_Thumb1_SjLj_EH_SOURCES} + ${arm_Thumb1_VFPv2_SOURCES} + ${arm_Thumb1_icache_SOURCES}) + +if(MINGW) + set(arm_SOURCES + arm/aeabi_idivmod.S + arm/aeabi_ldivmod.S + arm/aeabi_uidivmod.S + arm/aeabi_uldivmod.S + divmoddi4.c + divmodsi4.c + divdi3.c + divsi3.c + fixdfdi.c + fixsfdi.c + fixunsdfdi.c + fixunssfdi.c + floatdidf.c + floatdisf.c + floatundidf.c + floatundisf.c + mingw_fixfloat.c + moddi3.c + udivmoddi4.c + udivmodsi4.c + udivsi3.c + umoddi3.c + emutls.c) +elseif(NOT WIN32) + # TODO the EABI sources should only be added to EABI targets + set(arm_SOURCES + ${arm_SOURCES} + ${arm_EABI_SOURCES} + ${arm_Thumb1_SOURCES}) + + set(thumb1_SOURCES + ${thumb1_SOURCES} + ${arm_EABI_SOURCES}) +endif() set(aarch64_SOURCES - comparetf2.c - extenddftf2.c - extendsftf2.c - fixtfdi.c - fixtfsi.c - fixtfti.c - fixunstfdi.c - fixunstfsi.c - fixunstfti.c - floatditf.c - floatsitf.c - floatunditf.c - floatunsitf.c - multc3.c - trunctfdf2.c - trunctfsf2.c + ${GENERIC_TF_SOURCES} ${GENERIC_SOURCES}) set(armhf_SOURCES ${arm_SOURCES}) @@ -378,14 +447,16 @@ set(armv7k_SOURCES ${arm_SOURCES}) set(arm64_SOURCES ${aarch64_SOURCES}) # macho_embedded archs -set(armv6m_SOURCES ${GENERIC_SOURCES}) +set(armv6m_SOURCES ${thumb1_SOURCES}) set(armv7m_SOURCES ${arm_SOURCES}) set(armv7em_SOURCES ${arm_SOURCES}) set(mips_SOURCES ${GENERIC_SOURCES}) set(mipsel_SOURCES ${mips_SOURCES}) -set(mips64_SOURCES ${mips_SOURCES}) -set(mips64el_SOURCES ${mips_SOURCES}) +set(mips64_SOURCES ${GENERIC_TF_SOURCES} + ${mips_SOURCES}) +set(mips64el_SOURCES ${GENERIC_TF_SOURCES} + ${mips_SOURCES}) set(wasm32_SOURCES ${GENERIC_SOURCES}) set(wasm64_SOURCES ${GENERIC_SOURCES}) @@ -398,26 +469,57 @@ if (APPLE) add_subdirectory(macho_embedded) darwin_add_builtin_libraries(${BUILTIN_SUPPORTED_OS}) else () - append_string_if(COMPILER_RT_HAS_STD_C99_FLAG -std=gnu99 maybe_stdc99) + set(BUILTIN_CFLAGS "") + + append_list_if(COMPILER_RT_HAS_STD_C11_FLAG -std=c11 BUILTIN_CFLAGS) + + # These flags would normally be added to CMAKE_C_FLAGS by the llvm + # cmake step. Add them manually if this is a standalone build. + if(COMPILER_RT_STANDALONE_BUILD) + append_list_if(COMPILER_RT_HAS_FPIC_FLAG -fPIC BUILTIN_CFLAGS) + append_list_if(COMPILER_RT_HAS_FNO_BUILTIN_FLAG -fno-builtin BUILTIN_CFLAGS) + append_list_if(COMPILER_RT_HAS_VISIBILITY_HIDDEN_FLAG -fvisibility=hidden BUILTIN_CFLAGS) + if(NOT COMPILER_RT_DEBUG) + append_list_if(COMPILER_RT_HAS_OMIT_FRAME_POINTER_FLAG -fomit-frame-pointer BUILTIN_CFLAGS) + endif() + endif() + + set(BUILTIN_DEFS "") + + append_list_if(COMPILER_RT_HAS_VISIBILITY_HIDDEN_FLAG VISIBILITY_HIDDEN BUILTIN_DEFS) foreach (arch ${BUILTIN_SUPPORTED_ARCH}) if (CAN_TARGET_${arch}) + # NOTE: some architectures (e.g. i386) have multiple names. Ensure that + # we catch them all. + set(_arch ${arch}) + if("${arch}" STREQUAL "i686") + set(_arch "i386|i686") + endif() + # Filter out generic versions of routines that are re-implemented in # architecture specific manner. This prevents multiple definitions of the # same symbols, making the symbol selection non-deterministic. foreach (_file ${${arch}_SOURCES}) - if (${_file} MATCHES ${arch}/*) + if (${_file} MATCHES ${_arch}/*) get_filename_component(_name ${_file} NAME) string(REPLACE ".S" ".c" _cname "${_name}") list(REMOVE_ITEM ${arch}_SOURCES ${_cname}) endif () endforeach () + # Needed for clear_cache on debug mode, due to r7's usage in inline asm. + # Release mode already sets it via -O2/3, Debug mode doesn't. + if (${arch} STREQUAL "armhf") + list(APPEND BUILTIN_CFLAGS -fomit-frame-pointer -DCOMPILER_RT_ARMHF_TARGET) + endif() + add_compiler_rt_runtime(clang_rt.builtins STATIC ARCHS ${arch} SOURCES ${${arch}_SOURCES} - CFLAGS ${maybe_stdc99} + DEFS ${BUILTIN_DEFS} + CFLAGS ${BUILTIN_CFLAGS} PARENT_TARGET builtins) endif () endforeach () diff --git a/lib/libcompiler_rt/Makefile b/lib/libcompiler_rt/Makefile index 3791940ef38..a12a1fa9bc4 100644 --- a/lib/libcompiler_rt/Makefile +++ b/lib/libcompiler_rt/Makefile @@ -1,4 +1,4 @@ -# $OpenBSD: Makefile,v 1.10 2017/09/08 19:04:00 naddy Exp $ +# $OpenBSD: Makefile,v 1.11 2017/12/26 20:59:44 patrick Exp $ .include @@ -41,6 +41,8 @@ GEN_SRCS= absvdi2 \ ashrdi3 \ ashrti3 \ atomic \ + bswapdi2 \ + bswapsi2 \ clear_cache \ clzdi2 \ clzsi2 \ @@ -71,6 +73,7 @@ GEN_SRCS= absvdi2 \ extendsfdf2 \ extendhfsf2 \ ffsdi2 \ + ffssi2 \ ffsti2 \ fixdfdi \ fixdfsi \ @@ -195,8 +198,10 @@ SRCS+= comparetf2.c \ fixunstfti.c \ floatditf.c \ floatsitf.c \ - floatunsitf.c \ + floattitf.c \ floatunditf.c \ + floatunsitf.c \ + floatuntitf.c \ multc3.c \ trunctfdf2.c \ trunctfsf2.c @@ -220,8 +225,6 @@ SRCS+= aeabi_cdcmp.S \ aeabi_memset.S \ aeabi_uidivmod.S \ aeabi_uldivmod.S \ - bswapdi2.S \ - bswapsi2.S \ switch16.S \ switch32.S \ switch8.S \ diff --git a/lib/libcompiler_rt/Makefile.mk b/lib/libcompiler_rt/Makefile.mk deleted file mode 100644 index 00e2f53fc40..00000000000 --- a/lib/libcompiler_rt/Makefile.mk +++ /dev/null @@ -1,25 +0,0 @@ -#===- lib/builtins/Makefile.mk -----------------------------*- Makefile -*--===# -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -#===------------------------------------------------------------------------===# - -ModuleName := builtins -SubDirs := - -# Add arch specific optimized implementations. -SubDirs += i386 ppc x86_64 arm armv6m - -# Add ARM64 dir. -SubDirs += arm64 - -# Define the variables for this specific directory. -Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) -ObjNames := $(Sources:%.c=%.o) -Implementation := Generic - -# FIXME: use automatic dependencies? -Dependencies := $(wildcard $(Dir)/*.h) diff --git a/lib/libcompiler_rt/README.txt b/lib/libcompiler_rt/README.txt index ad36e4e5279..e603dfa0535 100644 --- a/lib/libcompiler_rt/README.txt +++ b/lib/libcompiler_rt/README.txt @@ -45,6 +45,7 @@ si_int __ctzsi2(si_int a); // count trailing zeros si_int __ctzdi2(di_int a); // count trailing zeros si_int __ctzti2(ti_int a); // count trailing zeros +si_int __ffssi2(si_int a); // find least significant 1 bit si_int __ffsdi2(di_int a); // find least significant 1 bit si_int __ffsti2(ti_int a); // find least significant 1 bit @@ -56,8 +57,8 @@ si_int __popcountsi2(si_int a); // bit population si_int __popcountdi2(di_int a); // bit population si_int __popcountti2(ti_int a); // bit population -uint32_t __bswapsi2(uint32_t a); // a byteswapped, arm only -uint64_t __bswapdi2(uint64_t a); // a byteswapped, arm only +uint32_t __bswapsi2(uint32_t a); // a byteswapped +uint64_t __bswapdi2(uint64_t a); // a byteswapped // Integral arithmetic diff --git a/lib/libcompiler_rt/adddf3.c b/lib/libcompiler_rt/adddf3.c index 8b7aae0a6f8..c528e9e21f5 100644 --- a/lib/libcompiler_rt/adddf3.c +++ b/lib/libcompiler_rt/adddf3.c @@ -15,8 +15,13 @@ #define DOUBLE_PRECISION #include "fp_add_impl.inc" -ARM_EABI_FNALIAS(dadd, adddf3) - COMPILER_RT_ABI double __adddf3(double a, double b){ return __addXf3__(a, b); } + +#if defined(__ARM_EABI__) +AEABI_RTABI double __aeabi_dadd(double a, double b) { + return __adddf3(a, b); +} +#endif + diff --git a/lib/libcompiler_rt/addsf3.c b/lib/libcompiler_rt/addsf3.c index 0f5d6ea4097..fe570687a25 100644 --- a/lib/libcompiler_rt/addsf3.c +++ b/lib/libcompiler_rt/addsf3.c @@ -15,8 +15,13 @@ #define SINGLE_PRECISION #include "fp_add_impl.inc" -ARM_EABI_FNALIAS(fadd, addsf3) - COMPILER_RT_ABI float __addsf3(float a, float b) { return __addXf3__(a, b); } + +#if defined(__ARM_EABI__) +AEABI_RTABI float __aeabi_fadd(float a, float b) { + return __addsf3(a, b); +} +#endif + diff --git a/lib/libcompiler_rt/arm/Makefile.mk b/lib/libcompiler_rt/arm/Makefile.mk deleted file mode 100644 index ed2e8323e39..00000000000 --- a/lib/libcompiler_rt/arm/Makefile.mk +++ /dev/null @@ -1,20 +0,0 @@ -#===- lib/builtins/arm/Makefile.mk -------------------------*- Makefile -*--===# -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -#===------------------------------------------------------------------------===# - -ModuleName := builtins -SubDirs := -OnlyArchs := armv5 armv6 armv7 armv7k armv7m armv7em armv7s - -AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file))) -Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) -ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o) -Implementation := Optimized - -# FIXME: use automatic dependencies? -Dependencies := $(wildcard lib/*.h $(Dir)/*.h) diff --git a/lib/libcompiler_rt/arm/adddf3vfp.S b/lib/libcompiler_rt/arm/adddf3vfp.S index f4c00a03e05..8e476cad162 100644 --- a/lib/libcompiler_rt/arm/adddf3vfp.S +++ b/lib/libcompiler_rt/arm/adddf3vfp.S @@ -18,10 +18,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__adddf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vadd.f64 d0, d0, d1 +#else vmov d6, r0, r1 // move first param from r0/r1 pair into d6 vmov d7, r2, r3 // move second param from r2/r3 pair into d7 vadd.f64 d6, d6, d7 vmov r0, r1, d6 // move result back to r0/r1 pair +#endif bx lr END_COMPILERRT_FUNCTION(__adddf3vfp) diff --git a/lib/libcompiler_rt/arm/addsf3.S b/lib/libcompiler_rt/arm/addsf3.S new file mode 100644 index 00000000000..362b5c147ea --- /dev/null +++ b/lib/libcompiler_rt/arm/addsf3.S @@ -0,0 +1,277 @@ +/*===-- addsf3.S - Adds two single precision floating pointer numbers-----===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __addsf3 (single precision floating pointer number + * addition with the IEEE-754 default rounding (to nearest, ties to even) + * function for the ARM Thumb1 ISA. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" +#define significandBits 23 +#define typeWidth 32 + + .syntax unified + .text + .thumb + .p2align 2 + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fadd, __addsf3) + +DEFINE_COMPILERRT_THUMB_FUNCTION(__addsf3) + push {r4, r5, r6, r7, lr} + // Get the absolute value of a and b. + lsls r2, r0, #1 + lsls r3, r1, #1 + lsrs r2, r2, #1 /* aAbs */ + beq LOCAL_LABEL(a_zero_nan_inf) + lsrs r3, r3, #1 /* bAbs */ + beq LOCAL_LABEL(zero_nan_inf) + + // Detect if a or b is infinity or Nan. + lsrs r6, r2, #(significandBits) + lsrs r7, r3, #(significandBits) + cmp r6, #0xFF + beq LOCAL_LABEL(zero_nan_inf) + cmp r7, #0xFF + beq LOCAL_LABEL(zero_nan_inf) + + // Swap Rep and Abs so that a and aAbs has the larger absolute value. + cmp r2, r3 + bhs LOCAL_LABEL(no_swap) + movs r4, r0 + movs r5, r2 + movs r0, r1 + movs r2, r3 + movs r1, r4 + movs r3, r5 +LOCAL_LABEL(no_swap): + + // Get the significands and shift them to give us round, guard and sticky. + lsls r4, r0, #(typeWidth - significandBits) + lsrs r4, r4, #(typeWidth - significandBits - 3) /* aSignificand << 3 */ + lsls r5, r1, #(typeWidth - significandBits) + lsrs r5, r5, #(typeWidth - significandBits - 3) /* bSignificand << 3 */ + + // Get the implicitBit. + movs r6, #1 + lsls r6, r6, #(significandBits + 3) + + // Get aExponent and set implicit bit if necessary. + lsrs r2, r2, #(significandBits) + beq LOCAL_LABEL(a_done_implicit_bit) + orrs r4, r6 +LOCAL_LABEL(a_done_implicit_bit): + + // Get bExponent and set implicit bit if necessary. + lsrs r3, r3, #(significandBits) + beq LOCAL_LABEL(b_done_implicit_bit) + orrs r5, r6 +LOCAL_LABEL(b_done_implicit_bit): + + // Get the difference in exponents. + subs r6, r2, r3 + beq LOCAL_LABEL(done_align) + + // If b is denormal, then a must be normal as align > 0, and we only need to + // right shift bSignificand by (align - 1) bits. + cmp r3, #0 + bne 1f + subs r6, r6, #1 +1: + + // No longer needs bExponent. r3 is dead here. + // Set sticky bits of b: sticky = bSignificand << (typeWidth - align). + movs r3, #(typeWidth) + subs r3, r3, r6 + movs r7, r5 + lsls r7, r3 + beq 1f + movs r7, #1 +1: + + // bSignificand = bSignificand >> align | sticky; + lsrs r5, r6 + orrs r5, r7 + bne LOCAL_LABEL(done_align) + movs r5, #1 // sticky; b is known to be non-zero. + +LOCAL_LABEL(done_align): + // isSubtraction = (aRep ^ bRep) >> 31; + movs r7, r0 + eors r7, r1 + lsrs r7, #31 + bne LOCAL_LABEL(do_substraction) + + // Same sign, do Addition. + + // aSignificand += bSignificand; + adds r4, r4, r5 + + // Check carry bit. + movs r6, #1 + lsls r6, r6, #(significandBits + 3 + 1) + movs r7, r4 + ands r7, r6 + beq LOCAL_LABEL(form_result) + // If the addition carried up, we need to right-shift the result and + // adjust the exponent. + movs r7, r4 + movs r6, #1 + ands r7, r6 // sticky = aSignificand & 1; + lsrs r4, #1 + orrs r4, r7 // result Significand + adds r2, #1 // result Exponent + // If we have overflowed the type, return +/- infinity. + cmp r2, 0xFF + beq LOCAL_LABEL(ret_inf) + +LOCAL_LABEL(form_result): + // Shift the sign, exponent and significand into place. + lsrs r0, #(typeWidth - 1) + lsls r0, #(typeWidth - 1) // Get Sign. + lsls r2, #(significandBits) + orrs r0, r2 + movs r1, r4 + lsls r4, #(typeWidth - significandBits - 3) + lsrs r4, #(typeWidth - significandBits) + orrs r0, r4 + + // Final rounding. The result may overflow to infinity, but that is the + // correct result in that case. + // roundGuardSticky = aSignificand & 0x7; + movs r2, #0x7 + ands r1, r2 + // if (roundGuardSticky > 0x4) result++; + + cmp r1, #0x4 + blt LOCAL_LABEL(done_round) + beq 1f + adds r0, #1 + pop {r4, r5, r6, r7, pc} +1: + + // if (roundGuardSticky == 0x4) result += result & 1; + movs r1, r0 + lsrs r1, #1 + bcc LOCAL_LABEL(done_round) + adds r0, r0, #1 +LOCAL_LABEL(done_round): + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(do_substraction): + subs r4, r4, r5 // aSignificand -= bSignificand; + beq LOCAL_LABEL(ret_zero) + movs r6, r4 + cmp r2, 0 + beq LOCAL_LABEL(form_result) // if a's exp is 0, no need to normalize. + // If partial cancellation occured, we need to left-shift the result + // and adjust the exponent: + lsrs r6, r6, #(significandBits + 3) + bne LOCAL_LABEL(form_result) + + push {r0, r1, r2, r3} + movs r0, r4 + bl __clzsi2 + movs r5, r0 + pop {r0, r1, r2, r3} + // shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3); + subs r5, r5, #(typeWidth - significandBits - 3 - 1) + // aSignificand <<= shift; aExponent -= shift; + lsls r4, r5 + subs r2, r2, r5 + bgt LOCAL_LABEL(form_result) + + // Do normalization if aExponent <= 0. + movs r6, #1 + subs r6, r6, r2 // 1 - aExponent; + movs r2, #0 // aExponent = 0; + movs r3, #(typeWidth) // bExponent is dead. + subs r3, r3, r6 + movs r7, r4 + lsls r7, r3 // stickyBit = (bool)(aSignificant << (typeWidth - align)) + beq 1f + movs r7, #1 +1: + lsrs r4, r6 /* aSignificand >> shift */ + orrs r4, r7 + b LOCAL_LABEL(form_result) + +LOCAL_LABEL(ret_zero): + movs r0, #0 + pop {r4, r5, r6, r7, pc} + + +LOCAL_LABEL(a_zero_nan_inf): + lsrs r3, r3, #1 + +LOCAL_LABEL(zero_nan_inf): + // Here r2 has aAbs, r3 has bAbs + movs r4, #0xFF + lsls r4, r4, #(significandBits) // Make +inf. + + cmp r2, r4 + bhi LOCAL_LABEL(a_is_nan) + cmp r3, r4 + bhi LOCAL_LABEL(b_is_nan) + + cmp r2, r4 + bne LOCAL_LABEL(a_is_rational) + // aAbs is INF. + eors r1, r0 // aRep ^ bRep. + movs r6, #1 + lsls r6, r6, #(typeWidth - 1) // get sign mask. + cmp r1, r6 // if they only differ on sign bit, it's -INF + INF + beq LOCAL_LABEL(a_is_nan) + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(a_is_rational): + cmp r3, r4 + bne LOCAL_LABEL(b_is_rational) + movs r0, r1 + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(b_is_rational): + // either a or b or both are zero. + adds r4, r2, r3 + beq LOCAL_LABEL(both_zero) + cmp r2, #0 // is absA 0 ? + beq LOCAL_LABEL(ret_b) + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(both_zero): + ands r0, r1 // +0 + -0 = +0 + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(ret_b): + movs r0, r1 + +LOCAL_LABEL(ret): + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(b_is_nan): + movs r0, r1 +LOCAL_LABEL(a_is_nan): + movs r1, #1 + lsls r1, r1, #(significandBits -1) // r1 is quiet bit. + orrs r0, r1 + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(ret_inf): + movs r4, #0xFF + lsls r4, r4, #(significandBits) + orrs r0, r4 + lsrs r0, r0, #(significandBits) + lsls r0, r0, #(significandBits) + pop {r4, r5, r6, r7, pc} + + +END_COMPILERRT_FUNCTION(__addsf3) + +NO_EXEC_STACK_DIRECTIVE diff --git a/lib/libcompiler_rt/arm/addsf3vfp.S b/lib/libcompiler_rt/arm/addsf3vfp.S index af40c1cc92a..8871efdcc5d 100644 --- a/lib/libcompiler_rt/arm/addsf3vfp.S +++ b/lib/libcompiler_rt/arm/addsf3vfp.S @@ -18,10 +18,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__addsf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vadd.f32 s0, s0, s1 +#else vmov s14, r0 // move first param from r0 into float register vmov s15, r1 // move second param from r1 into float register vadd.f32 s14, s14, s15 vmov r0, s14 // move result back to r0 +#endif bx lr END_COMPILERRT_FUNCTION(__addsf3vfp) diff --git a/lib/libcompiler_rt/arm/aeabi_cdcmp.S b/lib/libcompiler_rt/arm/aeabi_cdcmp.S index 8008f5fca26..3e7a8b86b73 100644 --- a/lib/libcompiler_rt/arm/aeabi_cdcmp.S +++ b/lib/libcompiler_rt/arm/aeabi_cdcmp.S @@ -30,13 +30,32 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmpeq) push {r0-r3, lr} bl __aeabi_cdcmpeq_check_nan cmp r0, #1 +#if __ARM_ARCH_ISA_THUMB == 1 + beq 1f + // NaN has been ruled out, so __aeabi_cdcmple can't trap + mov r0, sp + ldm r0, {r0-r3} + bl __aeabi_cdcmple + pop {r0-r3, pc} +1: + // Z = 0, C = 1 + movs r0, #0xF + lsls r0, r0, #31 + pop {r0-r3, pc} +#else pop {r0-r3, lr} // NaN has been ruled out, so __aeabi_cdcmple can't trap bne __aeabi_cdcmple +#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) + mov ip, #APSR_C + msr APSR_nzcvq, ip +#else msr CPSR_f, #APSR_C +#endif JMP(lr) +#endif END_COMPILERRT_FUNCTION(__aeabi_cdcmpeq) @@ -59,19 +78,48 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmple) bl __aeabi_dcmplt cmp r0, #1 +#if __ARM_ARCH_ISA_THUMB == 1 + bne 1f + // Z = 0, C = 0 + movs r0, #1 + lsls r0, r0, #1 + pop {r0-r3, pc} +1: + mov r0, sp + ldm r0, {r0-r3} + bl __aeabi_dcmpeq + cmp r0, #1 + bne 2f + // Z = 1, C = 1 + movs r0, #2 + lsls r0, r0, #31 + pop {r0-r3, pc} +2: + // Z = 0, C = 1 + movs r0, #0xF + lsls r0, r0, #31 + pop {r0-r3, pc} +#else + ITT(eq) moveq ip, #0 beq 1f ldm sp, {r0-r3} bl __aeabi_dcmpeq cmp r0, #1 + ITE(eq) moveq ip, #(APSR_C | APSR_Z) movne ip, #(APSR_C) 1: +#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) + msr APSR_nzcvq, ip +#else msr CPSR_f, ip +#endif pop {r0-r3} POP_PC() +#endif END_COMPILERRT_FUNCTION(__aeabi_cdcmple) // int __aeabi_cdrcmple(double a, double b) { diff --git a/lib/libcompiler_rt/arm/aeabi_cdcmpeq_check_nan.c b/lib/libcompiler_rt/arm/aeabi_cdcmpeq_check_nan.c index 577f6b2c553..7578433a1df 100644 --- a/lib/libcompiler_rt/arm/aeabi_cdcmpeq_check_nan.c +++ b/lib/libcompiler_rt/arm/aeabi_cdcmpeq_check_nan.c @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// #include +#include "../int_lib.h" -__attribute__((pcs("aapcs"))) -__attribute__((visibility("hidden"))) +AEABI_RTABI __attribute__((visibility("hidden"))) int __aeabi_cdcmpeq_check_nan(double a, double b) { return __builtin_isnan(a) || __builtin_isnan(b); } diff --git a/lib/libcompiler_rt/arm/aeabi_cfcmp.S b/lib/libcompiler_rt/arm/aeabi_cfcmp.S index 274baf7aecf..1f304ffd964 100644 --- a/lib/libcompiler_rt/arm/aeabi_cfcmp.S +++ b/lib/libcompiler_rt/arm/aeabi_cfcmp.S @@ -30,13 +30,32 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmpeq) push {r0-r3, lr} bl __aeabi_cfcmpeq_check_nan cmp r0, #1 +#if __ARM_ARCH_ISA_THUMB == 1 + beq 1f + // NaN has been ruled out, so __aeabi_cfcmple can't trap + mov r0, sp + ldm r0, {r0-r3} + bl __aeabi_cfcmple + pop {r0-r3, pc} +1: + // Z = 0, C = 1 + movs r0, #0xF + lsls r0, r0, #31 + pop {r0-r3, pc} +#else pop {r0-r3, lr} // NaN has been ruled out, so __aeabi_cfcmple can't trap bne __aeabi_cfcmple +#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) + mov ip, #APSR_C + msr APSR_nzcvq, ip +#else msr CPSR_f, #APSR_C +#endif JMP(lr) +#endif END_COMPILERRT_FUNCTION(__aeabi_cfcmpeq) @@ -59,19 +78,48 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmple) bl __aeabi_fcmplt cmp r0, #1 +#if __ARM_ARCH_ISA_THUMB == 1 + bne 1f + // Z = 0, C = 0 + movs r0, #1 + lsls r0, r0, #1 + pop {r0-r3, pc} +1: + mov r0, sp + ldm r0, {r0-r3} + bl __aeabi_fcmpeq + cmp r0, #1 + bne 2f + // Z = 1, C = 1 + movs r0, #2 + lsls r0, r0, #31 + pop {r0-r3, pc} +2: + // Z = 0, C = 1 + movs r0, #0xF + lsls r0, r0, #31 + pop {r0-r3, pc} +#else + ITT(eq) moveq ip, #0 beq 1f ldm sp, {r0-r3} bl __aeabi_fcmpeq cmp r0, #1 + ITE(eq) moveq ip, #(APSR_C | APSR_Z) movne ip, #(APSR_C) 1: +#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) + msr APSR_nzcvq, ip +#else msr CPSR_f, ip +#endif pop {r0-r3} POP_PC() +#endif END_COMPILERRT_FUNCTION(__aeabi_cfcmple) // int __aeabi_cfrcmple(float a, float b) { diff --git a/lib/libcompiler_rt/arm/aeabi_cfcmpeq_check_nan.c b/lib/libcompiler_rt/arm/aeabi_cfcmpeq_check_nan.c index 992e31fbd8d..43dde9a4959 100644 --- a/lib/libcompiler_rt/arm/aeabi_cfcmpeq_check_nan.c +++ b/lib/libcompiler_rt/arm/aeabi_cfcmpeq_check_nan.c @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// #include +#include "../int_lib.h" -__attribute__((pcs("aapcs"))) -__attribute__((visibility("hidden"))) +AEABI_RTABI __attribute__((visibility("hidden"))) int __aeabi_cfcmpeq_check_nan(float a, float b) { return __builtin_isnan(a) || __builtin_isnan(b); } diff --git a/lib/libcompiler_rt/arm/aeabi_dcmp.S b/lib/libcompiler_rt/arm/aeabi_dcmp.S index 43e439268d9..9fa78b46124 100644 --- a/lib/libcompiler_rt/arm/aeabi_dcmp.S +++ b/lib/libcompiler_rt/arm/aeabi_dcmp.S @@ -18,18 +18,27 @@ // } // } +#if defined(COMPILER_RT_ARMHF_TARGET) +# define CONVERT_DCMP_ARGS_TO_DF2_ARGS \ + vmov d0, r0, r1 SEPARATOR \ + vmov d1, r2, r3 +#else +# define CONVERT_DCMP_ARGS_TO_DF2_ARGS +#endif + #define DEFINE_AEABI_DCMP(cond) \ .syntax unified SEPARATOR \ .p2align 2 SEPARATOR \ DEFINE_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond) \ push { r4, lr } SEPARATOR \ + CONVERT_DCMP_ARGS_TO_DF2_ARGS SEPARATOR \ bl SYMBOL_NAME(__ ## cond ## df2) SEPARATOR \ cmp r0, #0 SEPARATOR \ b ## cond 1f SEPARATOR \ - mov r0, #0 SEPARATOR \ + movs r0, #0 SEPARATOR \ pop { r4, pc } SEPARATOR \ 1: SEPARATOR \ - mov r0, #1 SEPARATOR \ + movs r0, #1 SEPARATOR \ pop { r4, pc } SEPARATOR \ END_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond) diff --git a/lib/libcompiler_rt/arm/aeabi_div0.c b/lib/libcompiler_rt/arm/aeabi_div0.c index ccc95fa5c12..dc3031326e3 100644 --- a/lib/libcompiler_rt/arm/aeabi_div0.c +++ b/lib/libcompiler_rt/arm/aeabi_div0.c @@ -26,16 +26,18 @@ * line. */ +#include "../int_lib.h" + /* provide an unused declaration to pacify pendantic compilation */ extern unsigned char declaration; #if defined(__ARM_EABI__) -int __attribute__((weak)) __attribute__((visibility("hidden"))) +AEABI_RTABI int __attribute__((weak)) __attribute__((visibility("hidden"))) __aeabi_idiv0(int return_value) { return return_value; } -long long __attribute__((weak)) __attribute__((visibility("hidden"))) +AEABI_RTABI long long __attribute__((weak)) __attribute__((visibility("hidden"))) __aeabi_ldiv0(long long return_value) { return return_value; } diff --git a/lib/libcompiler_rt/arm/aeabi_drsub.c b/lib/libcompiler_rt/arm/aeabi_drsub.c index fc17d5a4cc7..1254886086f 100644 --- a/lib/libcompiler_rt/arm/aeabi_drsub.c +++ b/lib/libcompiler_rt/arm/aeabi_drsub.c @@ -10,10 +10,10 @@ #define DOUBLE_PRECISION #include "../fp_lib.h" -COMPILER_RT_ABI fp_t +AEABI_RTABI fp_t __aeabi_dsub(fp_t, fp_t); -COMPILER_RT_ABI fp_t +AEABI_RTABI fp_t __aeabi_drsub(fp_t a, fp_t b) { return __aeabi_dsub(b, a); } diff --git a/lib/libcompiler_rt/arm/aeabi_fcmp.S b/lib/libcompiler_rt/arm/aeabi_fcmp.S index 0a1d92a60b6..ea5b96c21d5 100644 --- a/lib/libcompiler_rt/arm/aeabi_fcmp.S +++ b/lib/libcompiler_rt/arm/aeabi_fcmp.S @@ -18,18 +18,27 @@ // } // } +#if defined(COMPILER_RT_ARMHF_TARGET) +# define CONVERT_FCMP_ARGS_TO_SF2_ARGS \ + vmov s0, r0 SEPARATOR \ + vmov s1, r1 +#else +# define CONVERT_FCMP_ARGS_TO_SF2_ARGS +#endif + #define DEFINE_AEABI_FCMP(cond) \ .syntax unified SEPARATOR \ .p2align 2 SEPARATOR \ DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond) \ push { r4, lr } SEPARATOR \ + CONVERT_FCMP_ARGS_TO_SF2_ARGS SEPARATOR \ bl SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \ cmp r0, #0 SEPARATOR \ b ## cond 1f SEPARATOR \ - mov r0, #0 SEPARATOR \ + movs r0, #0 SEPARATOR \ pop { r4, pc } SEPARATOR \ 1: SEPARATOR \ - mov r0, #1 SEPARATOR \ + movs r0, #1 SEPARATOR \ pop { r4, pc } SEPARATOR \ END_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond) diff --git a/lib/libcompiler_rt/arm/aeabi_frsub.c b/lib/libcompiler_rt/arm/aeabi_frsub.c index 64258dc7e07..34f2303745b 100644 --- a/lib/libcompiler_rt/arm/aeabi_frsub.c +++ b/lib/libcompiler_rt/arm/aeabi_frsub.c @@ -10,10 +10,10 @@ #define SINGLE_PRECISION #include "../fp_lib.h" -COMPILER_RT_ABI fp_t +AEABI_RTABI fp_t __aeabi_fsub(fp_t, fp_t); -COMPILER_RT_ABI fp_t +AEABI_RTABI fp_t __aeabi_frsub(fp_t a, fp_t b) { return __aeabi_fsub(b, a); } diff --git a/lib/libcompiler_rt/arm/aeabi_idivmod.S b/lib/libcompiler_rt/arm/aeabi_idivmod.S index 2fcad862f73..0164b15dca1 100644 --- a/lib/libcompiler_rt/arm/aeabi_idivmod.S +++ b/lib/libcompiler_rt/arm/aeabi_idivmod.S @@ -15,16 +15,34 @@ // return {quot, rem}; // } +#if defined(__MINGW32__) +#define __aeabi_idivmod __rt_sdiv +#endif + .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod) +#if __ARM_ARCH_ISA_THUMB == 1 + push {r0, r1, lr} + bl SYMBOL_NAME(__divsi3) + pop {r1, r2, r3} // now r0 = quot, r1 = num, r2 = denom + muls r2, r0, r2 // r2 = quot * denom + subs r1, r1, r2 + JMP (r3) +#else push { lr } sub sp, sp, #4 mov r2, sp +#if defined(__MINGW32__) + mov r3, r0 + mov r0, r1 + mov r1, r3 +#endif bl SYMBOL_NAME(__divmodsi4) ldr r1, [sp] add sp, sp, #4 pop { pc } +#endif // __ARM_ARCH_ISA_THUMB == 1 END_COMPILERRT_FUNCTION(__aeabi_idivmod) NO_EXEC_STACK_DIRECTIVE diff --git a/lib/libcompiler_rt/arm/aeabi_ldivmod.S b/lib/libcompiler_rt/arm/aeabi_ldivmod.S index 9f161f3007f..038ae5d723a 100644 --- a/lib/libcompiler_rt/arm/aeabi_ldivmod.S +++ b/lib/libcompiler_rt/arm/aeabi_ldivmod.S @@ -16,18 +16,30 @@ // return {quot, rem}; // } +#if defined(__MINGW32__) +#define __aeabi_ldivmod __rt_sdiv64 +#endif + .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__aeabi_ldivmod) - push {r11, lr} + push {r6, lr} sub sp, sp, #16 - add r12, sp, #8 - str r12, [sp] + add r6, sp, #8 + str r6, [sp] +#if defined(__MINGW32__) + movs r6, r0 + movs r0, r2 + movs r2, r6 + movs r6, r1 + movs r1, r3 + movs r3, r6 +#endif bl SYMBOL_NAME(__divmoddi4) ldr r2, [sp, #8] ldr r3, [sp, #12] add sp, sp, #16 - pop {r11, pc} + pop {r6, pc} END_COMPILERRT_FUNCTION(__aeabi_ldivmod) NO_EXEC_STACK_DIRECTIVE diff --git a/lib/libcompiler_rt/arm/aeabi_memset.S b/lib/libcompiler_rt/arm/aeabi_memset.S index 48edd89705b..633f592279b 100644 --- a/lib/libcompiler_rt/arm/aeabi_memset.S +++ b/lib/libcompiler_rt/arm/aeabi_memset.S @@ -26,7 +26,7 @@ DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset8, __aeabi_memset) DEFINE_COMPILERRT_FUNCTION(__aeabi_memclr) mov r2, r1 - mov r1, #0 + movs r1, #0 b memset END_COMPILERRT_FUNCTION(__aeabi_memclr) diff --git a/lib/libcompiler_rt/arm/aeabi_uidivmod.S b/lib/libcompiler_rt/arm/aeabi_uidivmod.S index e1e12d97aa0..a627fc740a0 100644 --- a/lib/libcompiler_rt/arm/aeabi_uidivmod.S +++ b/lib/libcompiler_rt/arm/aeabi_uidivmod.S @@ -16,16 +16,40 @@ // return {quot, rem}; // } +#if defined(__MINGW32__) +#define __aeabi_uidivmod __rt_udiv +#endif + .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod) +#if __ARM_ARCH_ISA_THUMB == 1 + cmp r0, r1 + bcc LOCAL_LABEL(case_denom_larger) + push {r0, r1, lr} + bl SYMBOL_NAME(__aeabi_uidiv) + pop {r1, r2, r3} + muls r2, r0, r2 // r2 = quot * denom + subs r1, r1, r2 + JMP (r3) +LOCAL_LABEL(case_denom_larger): + movs r1, r0 + movs r0, #0 + JMP (lr) +#else push { lr } sub sp, sp, #4 mov r2, sp +#if defined(__MINGW32__) + mov r3, r0 + mov r0, r1 + mov r1, r3 +#endif bl SYMBOL_NAME(__udivmodsi4) ldr r1, [sp] add sp, sp, #4 pop { pc } +#endif END_COMPILERRT_FUNCTION(__aeabi_uidivmod) NO_EXEC_STACK_DIRECTIVE diff --git a/lib/libcompiler_rt/arm/aeabi_uldivmod.S b/lib/libcompiler_rt/arm/aeabi_uldivmod.S index e8aaef282e9..be343b6bc82 100644 --- a/lib/libcompiler_rt/arm/aeabi_uldivmod.S +++ b/lib/libcompiler_rt/arm/aeabi_uldivmod.S @@ -16,18 +16,30 @@ // return {quot, rem}; // } +#if defined(__MINGW32__) +#define __aeabi_uldivmod __rt_udiv64 +#endif + .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__aeabi_uldivmod) - push {r11, lr} + push {r6, lr} sub sp, sp, #16 - add r12, sp, #8 - str r12, [sp] + add r6, sp, #8 + str r6, [sp] +#if defined(__MINGW32__) + movs r6, r0 + movs r0, r2 + movs r2, r6 + movs r6, r1 + movs r1, r3 + movs r3, r6 +#endif bl SYMBOL_NAME(__udivmoddi4) ldr r2, [sp, #8] ldr r3, [sp, #12] add sp, sp, #16 - pop {r11, pc} + pop {r6, pc} END_COMPILERRT_FUNCTION(__aeabi_uldivmod) NO_EXEC_STACK_DIRECTIVE diff --git a/lib/libcompiler_rt/arm/comparesf2.S b/lib/libcompiler_rt/arm/comparesf2.S index 52597b673f9..ef7091bf3c8 100644 --- a/lib/libcompiler_rt/arm/comparesf2.S +++ b/lib/libcompiler_rt/arm/comparesf2.S @@ -39,32 +39,64 @@ #include "../assembly.h" .syntax unified +#if __ARM_ARCH_ISA_THUMB == 2 +.thumb +#endif -.p2align 2 +@ int __eqsf2(float a, float b) + + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__eqsf2) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmov r0, s0 + vmov r1, s1 +#endif // Make copies of a and b with the sign bit shifted off the top. These will // be used to detect zeros and NaNs. +#if __ARM_ARCH_ISA_THUMB == 1 + push {r6, lr} + lsls r2, r0, #1 + lsls r3, r1, #1 +#else mov r2, r0, lsl #1 mov r3, r1, lsl #1 +#endif // We do the comparison in three stages (ignoring NaN values for the time // being). First, we orr the absolute values of a and b; this sets the Z // flag if both a and b are zero (of either sign). The shift of r3 doesn't // effect this at all, but it *does* make sure that the C flag is clear for // the subsequent operations. +#if __ARM_ARCH_ISA_THUMB == 1 + lsrs r6, r3, #1 + orrs r6, r2 +#else orrs r12, r2, r3, lsr #1 - +#endif // Next, we check if a and b have the same or different signs. If they have // opposite signs, this eor will set the N flag. +#if __ARM_ARCH_ISA_THUMB == 1 + beq 1f + movs r6, r0 + eors r6, r1 +1: +#else it ne eorsne r12, r0, r1 +#endif // If a and b are equal (either both zeros or bit identical; again, we're // ignoring NaNs for now), this subtract will zero out r0. If they have the // same sign, the flags are updated as they would be for a comparison of the // absolute values of a and b. +#if __ARM_ARCH_ISA_THUMB == 1 + bmi 1f + subs r0, r2, r3 +1: +#else it pl subspl r0, r2, r3 +#endif // If a is smaller in magnitude than b and both have the same sign, place // the negation of the sign of b in r0. Thus, if both are negative and @@ -76,41 +108,126 @@ DEFINE_COMPILERRT_FUNCTION(__eqsf2) // still clear from the shift argument in orrs; if a is positive and b // negative, this places 0 in r0; if a is negative and b positive, -1 is // placed in r0. +#if __ARM_ARCH_ISA_THUMB == 1 + bhs 1f + // Here if a and b have the same sign and absA < absB, the result is thus + // b < 0 ? 1 : -1. Same if a and b have the opposite sign (ignoring Nan). + movs r0, #1 + lsrs r1, #31 + bne LOCAL_LABEL(CHECK_NAN) + negs r0, r0 + b LOCAL_LABEL(CHECK_NAN) +1: +#else it lo mvnlo r0, r1, asr #31 +#endif // If a is greater in magnitude than b and both have the same sign, place // the sign of b in r0. Thus, if both are negative and a < b, -1 is placed // in r0, which is the desired result. Conversely, if both are positive // and a > b, zero is placed in r0. +#if __ARM_ARCH_ISA_THUMB == 1 + bls 1f + // Here both have the same sign and absA > absB. + movs r0, #1 + lsrs r1, #31 + beq LOCAL_LABEL(CHECK_NAN) + negs r0, r0 +1: +#else it hi movhi r0, r1, asr #31 +#endif // If you've been keeping track, at this point r0 contains -1 if a < b and // 0 if a >= b. All that remains to be done is to set it to 1 if a > b. // If a == b, then the Z flag is set, so we can get the correct final value // into r0 by simply or'ing with 1 if Z is clear. + // For Thumb-1, r0 contains -1 if a < b, 0 if a > b and 0 if a == b. +#if __ARM_ARCH_ISA_THUMB != 1 it ne orrne r0, r0, #1 +#endif // Finally, we need to deal with NaNs. If either argument is NaN, replace // the value in r0 with 1. +#if __ARM_ARCH_ISA_THUMB == 1 +LOCAL_LABEL(CHECK_NAN): + movs r6, #0xff + lsls r6, #24 + cmp r2, r6 + bhi 1f + cmp r3, r6 +1: + bls 2f + movs r0, #1 +2: + pop {r6, pc} +#else cmp r2, #0xff000000 ite ls cmpls r3, #0xff000000 movhi r0, #1 JMP(lr) +#endif END_COMPILERRT_FUNCTION(__eqsf2) + DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __eqsf2) DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __eqsf2) DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __eqsf2) -.p2align 2 +@ int __gtsf2(float a, float b) + + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__gtsf2) // Identical to the preceding except in that we return -1 for NaN values. - // Given that the two paths share so much code, one might be tempted to + // Given that the two paths share so much code, one might be tempted to // unify them; however, the extra code needed to do so makes the code size // to performance tradeoff very hard to justify for such small functions. +#if defined(COMPILER_RT_ARMHF_TARGET) + vmov r0, s0 + vmov r1, s1 +#endif +#if __ARM_ARCH_ISA_THUMB == 1 + push {r6, lr} + lsls r2, r0, #1 + lsls r3, r1, #1 + lsrs r6, r3, #1 + orrs r6, r2 + beq 1f + movs r6, r0 + eors r6, r1 +1: + bmi 2f + subs r0, r2, r3 +2: + bhs 3f + movs r0, #1 + lsrs r1, #31 + bne LOCAL_LABEL(CHECK_NAN_2) + negs r0, r0 + b LOCAL_LABEL(CHECK_NAN_2) +3: + bls 4f + movs r0, #1 + lsrs r1, #31 + beq LOCAL_LABEL(CHECK_NAN_2) + negs r0, r0 +4: +LOCAL_LABEL(CHECK_NAN_2): + movs r6, #0xff + lsls r6, #24 + cmp r2, r6 + bhi 5f + cmp r3, r6 +5: + bls 6f + movs r0, #1 + negs r0, r0 +6: + pop {r6, pc} +#else mov r2, r0, lsl #1 mov r3, r1, lsl #1 orrs r12, r2, r3, lsr #1 @@ -129,23 +246,51 @@ DEFINE_COMPILERRT_FUNCTION(__gtsf2) cmpls r3, #0xff000000 movhi r0, #-1 JMP(lr) +#endif END_COMPILERRT_FUNCTION(__gtsf2) + DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __gtsf2) -.p2align 2 +@ int __unordsf2(float a, float b) + + .p2align 2 DEFINE_COMPILERRT_FUNCTION(__unordsf2) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmov r0, s0 + vmov r1, s1 +#endif // Return 1 for NaN values, 0 otherwise. - mov r2, r0, lsl #1 - mov r3, r1, lsl #1 - mov r0, #0 + lsls r2, r0, #1 + lsls r3, r1, #1 + movs r0, #0 +#if __ARM_ARCH_ISA_THUMB == 1 + movs r1, #0xff + lsls r1, #24 + cmp r2, r1 + bhi 1f + cmp r3, r1 +1: + bls 2f + movs r0, #1 +2: +#else cmp r2, #0xff000000 ite ls cmpls r3, #0xff000000 movhi r0, #1 +#endif JMP(lr) END_COMPILERRT_FUNCTION(__unordsf2) +#if defined(COMPILER_RT_ARMHF_TARGET) +DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmpum) + vmov s0, r0 + vmov s1, r1 + b SYMBOL_NAME(__unordsf2) +END_COMPILERRT_FUNCTION(__aeabi_fcmpum) +#else DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fcmpun, __unordsf2) +#endif NO_EXEC_STACK_DIRECTIVE diff --git a/lib/libcompiler_rt/arm/divdf3vfp.S b/lib/libcompiler_rt/arm/divdf3vfp.S index 928f53809f1..776ba4f24b4 100644 --- a/lib/libcompiler_rt/arm/divdf3vfp.S +++ b/lib/libcompiler_rt/arm/divdf3vfp.S @@ -18,10 +18,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__divdf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vdiv.f64 d0, d0, d1 +#else vmov d6, r0, r1 // move first param from r0/r1 pair into d6 vmov d7, r2, r3 // move second param from r2/r3 pair into d7 - vdiv.f64 d5, d6, d7 + vdiv.f64 d5, d6, d7 vmov r0, r1, d5 // move result back to r0/r1 pair +#endif bx lr END_COMPILERRT_FUNCTION(__divdf3vfp) diff --git a/lib/libcompiler_rt/arm/divsf3vfp.S b/lib/libcompiler_rt/arm/divsf3vfp.S index a2e297f7015..130318f0c37 100644 --- a/lib/libcompiler_rt/arm/divsf3vfp.S +++ b/lib/libcompiler_rt/arm/divsf3vfp.S @@ -18,10 +18,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__divsf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vdiv.f32 s0, s0, s1 +#else vmov s14, r0 // move first param from r0 into float register vmov s15, r1 // move second param from r1 into float register vdiv.f32 s13, s14, s15 vmov r0, s13 // move result back to r0 +#endif bx lr END_COMPILERRT_FUNCTION(__divsf3vfp) diff --git a/lib/libcompiler_rt/arm/divsi3.S b/lib/libcompiler_rt/arm/divsi3.S index 7e23ba4fc23..f066f60ad96 100644 --- a/lib/libcompiler_rt/arm/divsi3.S +++ b/lib/libcompiler_rt/arm/divsi3.S @@ -49,17 +49,37 @@ LOCAL_LABEL(divzero): #else ESTABLISH_FRAME // Set aside the sign of the quotient. +# if __ARM_ARCH_ISA_THUMB == 1 + movs r4, r0 + eors r4, r1 +# else eor r4, r0, r1 +# endif // Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31). +# if __ARM_ARCH_ISA_THUMB == 1 + asrs r2, r0, #31 + asrs r3, r1, #31 + eors r0, r2 + eors r1, r3 + subs r0, r0, r2 + subs r1, r1, r3 +# else eor r2, r0, r0, asr #31 eor r3, r1, r1, asr #31 sub r0, r2, r0, asr #31 sub r1, r3, r1, asr #31 +# endif // abs(a) / abs(b) bl SYMBOL_NAME(__udivsi3) // Apply sign of quotient to result and return. +# if __ARM_ARCH_ISA_THUMB == 1 + asrs r4, #31 + eors r0, r4 + subs r0, r0, r4 +# else eor r0, r0, r4, asr #31 sub r0, r0, r4, asr #31 +# endif CLEAR_FRAME_AND_RETURN #endif END_COMPILERRT_FUNCTION(__divsi3) diff --git a/lib/libcompiler_rt/arm/eqdf2vfp.S b/lib/libcompiler_rt/arm/eqdf2vfp.S index 95e6bb36334..d5070657091 100644 --- a/lib/libcompiler_rt/arm/eqdf2vfp.S +++ b/lib/libcompiler_rt/arm/eqdf2vfp.S @@ -19,10 +19,15 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__eqdf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else vmov d6, r0, r1 // load r0/r1 pair in double register vmov d7, r2, r3 // load r2/r3 pair in double register vcmp.f64 d6, d7 +#endif vmrs apsr_nzcv, fpscr + ITE(eq) moveq r0, #1 // set result register to 1 if equal movne r0, #0 bx lr diff --git a/lib/libcompiler_rt/arm/eqsf2vfp.S b/lib/libcompiler_rt/arm/eqsf2vfp.S index fbac139c193..fd72b2fdbde 100644 --- a/lib/libcompiler_rt/arm/eqsf2vfp.S +++ b/lib/libcompiler_rt/arm/eqsf2vfp.S @@ -19,10 +19,15 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__eqsf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else vmov s14, r0 // move from GPR 0 to float register vmov s15, r1 // move from GPR 1 to float register vcmp.f32 s14, s15 +#endif vmrs apsr_nzcv, fpscr + ITE(eq) moveq r0, #1 // set result register to 1 if equal movne r0, #0 bx lr diff --git a/lib/libcompiler_rt/arm/extendsfdf2vfp.S b/lib/libcompiler_rt/arm/extendsfdf2vfp.S index 563bf92afc3..1079f977bae 100644 --- a/lib/libcompiler_rt/arm/extendsfdf2vfp.S +++ b/lib/libcompiler_rt/arm/extendsfdf2vfp.S @@ -19,9 +19,13 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__extendsfdf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcvt.f64.f32 d0, s0 +#else vmov s15, r0 // load float register from R0 vcvt.f64.f32 d7, s15 // convert single to double vmov r0, r1, d7 // return result in r0/r1 pair +#endif bx lr END_COMPILERRT_FUNCTION(__extendsfdf2vfp) diff --git a/lib/libcompiler_rt/arm/fixdfsivfp.S b/lib/libcompiler_rt/arm/fixdfsivfp.S index 8263ff942f8..5d7b0f85654 100644 --- a/lib/libcompiler_rt/arm/fixdfsivfp.S +++ b/lib/libcompiler_rt/arm/fixdfsivfp.S @@ -19,9 +19,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__fixdfsivfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcvt.s32.f64 s0, d0 + vmov r0, s0 +#else vmov d7, r0, r1 // load double register from R0/R1 vcvt.s32.f64 s15, d7 // convert double to 32-bit int into s15 vmov r0, s15 // move s15 to result register +#endif bx lr END_COMPILERRT_FUNCTION(__fixdfsivfp) diff --git a/lib/libcompiler_rt/arm/fixsfsivfp.S b/lib/libcompiler_rt/arm/fixsfsivfp.S index c7c3b811787..805a277afa3 100644 --- a/lib/libcompiler_rt/arm/fixsfsivfp.S +++ b/lib/libcompiler_rt/arm/fixsfsivfp.S @@ -19,9 +19,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__fixsfsivfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcvt.s32.f32 s0, s0 + vmov r0, s0 +#else vmov s15, r0 // load float register from R0 vcvt.s32.f32 s15, s15 // convert single to 32-bit int into s15 vmov r0, s15 // move s15 to result register +#endif bx lr END_COMPILERRT_FUNCTION(__fixsfsivfp) diff --git a/lib/libcompiler_rt/arm/fixunsdfsivfp.S b/lib/libcompiler_rt/arm/fixunsdfsivfp.S index 9cc1e628699..4f1b2c8cefd 100644 --- a/lib/libcompiler_rt/arm/fixunsdfsivfp.S +++ b/lib/libcompiler_rt/arm/fixunsdfsivfp.S @@ -20,9 +20,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__fixunsdfsivfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcvt.u32.f64 s0, d0 + vmov r0, s0 +#else vmov d7, r0, r1 // load double register from R0/R1 vcvt.u32.f64 s15, d7 // convert double to 32-bit int into s15 vmov r0, s15 // move s15 to result register +#endif bx lr END_COMPILERRT_FUNCTION(__fixunsdfsivfp) diff --git a/lib/libcompiler_rt/arm/fixunssfsivfp.S b/lib/libcompiler_rt/arm/fixunssfsivfp.S index 79d70822911..e5d77823687 100644 --- a/lib/libcompiler_rt/arm/fixunssfsivfp.S +++ b/lib/libcompiler_rt/arm/fixunssfsivfp.S @@ -20,9 +20,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__fixunssfsivfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcvt.u32.f32 s0, s0 + vmov r0, s0 +#else vmov s15, r0 // load float register from R0 vcvt.u32.f32 s15, s15 // convert single to 32-bit unsigned into s15 vmov r0, s15 // move s15 to result register +#endif bx lr END_COMPILERRT_FUNCTION(__fixunssfsivfp) diff --git a/lib/libcompiler_rt/arm/floatsidfvfp.S b/lib/libcompiler_rt/arm/floatsidfvfp.S index 7623f26c6e6..3297ad44d8c 100644 --- a/lib/libcompiler_rt/arm/floatsidfvfp.S +++ b/lib/libcompiler_rt/arm/floatsidfvfp.S @@ -19,9 +19,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__floatsidfvfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmov s0, r0 + vcvt.f64.s32 d0, s0 +#else vmov s15, r0 // move int to float register s15 vcvt.f64.s32 d7, s15 // convert 32-bit int in s15 to double in d7 vmov r0, r1, d7 // move d7 to result register pair r0/r1 +#endif bx lr END_COMPILERRT_FUNCTION(__floatsidfvfp) diff --git a/lib/libcompiler_rt/arm/floatsisfvfp.S b/lib/libcompiler_rt/arm/floatsisfvfp.S index c73dfac13eb..65408b54b8d 100644 --- a/lib/libcompiler_rt/arm/floatsisfvfp.S +++ b/lib/libcompiler_rt/arm/floatsisfvfp.S @@ -19,9 +19,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__floatsisfvfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmov s0, r0 + vcvt.f32.s32 s0, s0 +#else vmov s15, r0 // move int to float register s15 vcvt.f32.s32 s15, s15 // convert 32-bit int in s15 to float in s15 vmov r0, s15 // move s15 to result register +#endif bx lr END_COMPILERRT_FUNCTION(__floatsisfvfp) diff --git a/lib/libcompiler_rt/arm/floatunssidfvfp.S b/lib/libcompiler_rt/arm/floatunssidfvfp.S index 2a59fdb830b..d7a7024a25b 100644 --- a/lib/libcompiler_rt/arm/floatunssidfvfp.S +++ b/lib/libcompiler_rt/arm/floatunssidfvfp.S @@ -19,9 +19,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__floatunssidfvfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmov s0, r0 + vcvt.f64.u32 d0, s0 +#else vmov s15, r0 // move int to float register s15 vcvt.f64.u32 d7, s15 // convert 32-bit int in s15 to double in d7 vmov r0, r1, d7 // move d7 to result register pair r0/r1 +#endif bx lr END_COMPILERRT_FUNCTION(__floatunssidfvfp) diff --git a/lib/libcompiler_rt/arm/floatunssisfvfp.S b/lib/libcompiler_rt/arm/floatunssisfvfp.S index c096263c1bc..1ca856519a9 100644 --- a/lib/libcompiler_rt/arm/floatunssisfvfp.S +++ b/lib/libcompiler_rt/arm/floatunssisfvfp.S @@ -19,9 +19,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__floatunssisfvfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmov s0, r0 + vcvt.f32.u32 s0, s0 +#else vmov s15, r0 // move int to float register s15 vcvt.f32.u32 s15, s15 // convert 32-bit int in s15 to float in s15 vmov r0, s15 // move s15 to result register +#endif bx lr END_COMPILERRT_FUNCTION(__floatunssisfvfp) diff --git a/lib/libcompiler_rt/arm/gedf2vfp.S b/lib/libcompiler_rt/arm/gedf2vfp.S index 72f13ef4e71..364fc5b24cd 100644 --- a/lib/libcompiler_rt/arm/gedf2vfp.S +++ b/lib/libcompiler_rt/arm/gedf2vfp.S @@ -19,10 +19,15 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__gedf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else vmov d6, r0, r1 // load r0/r1 pair in double register vmov d7, r2, r3 // load r2/r3 pair in double register vcmp.f64 d6, d7 +#endif vmrs apsr_nzcv, fpscr + ITE(ge) movge r0, #1 // set result register to 1 if greater than or equal movlt r0, #0 bx lr diff --git a/lib/libcompiler_rt/arm/gesf2vfp.S b/lib/libcompiler_rt/arm/gesf2vfp.S index c9ee52c9c44..346c3473ae4 100644 --- a/lib/libcompiler_rt/arm/gesf2vfp.S +++ b/lib/libcompiler_rt/arm/gesf2vfp.S @@ -19,10 +19,15 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__gesf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else vmov s14, r0 // move from GPR 0 to float register vmov s15, r1 // move from GPR 1 to float register vcmp.f32 s14, s15 +#endif vmrs apsr_nzcv, fpscr + ITE(ge) movge r0, #1 // set result register to 1 if greater than or equal movlt r0, #0 bx lr diff --git a/lib/libcompiler_rt/arm/gtdf2vfp.S b/lib/libcompiler_rt/arm/gtdf2vfp.S index c7f277552fa..3389c3ad973 100644 --- a/lib/libcompiler_rt/arm/gtdf2vfp.S +++ b/lib/libcompiler_rt/arm/gtdf2vfp.S @@ -19,10 +19,15 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__gtdf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else vmov d6, r0, r1 // load r0/r1 pair in double register vmov d7, r2, r3 // load r2/r3 pair in double register vcmp.f64 d6, d7 +#endif vmrs apsr_nzcv, fpscr + ITE(gt) movgt r0, #1 // set result register to 1 if equal movle r0, #0 bx lr diff --git a/lib/libcompiler_rt/arm/gtsf2vfp.S b/lib/libcompiler_rt/arm/gtsf2vfp.S index 7d49e4564a8..afdba8b018e 100644 --- a/lib/libcompiler_rt/arm/gtsf2vfp.S +++ b/lib/libcompiler_rt/arm/gtsf2vfp.S @@ -19,10 +19,15 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__gtsf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else vmov s14, r0 // move from GPR 0 to float register vmov s15, r1 // move from GPR 1 to float register vcmp.f32 s14, s15 +#endif vmrs apsr_nzcv, fpscr + ITE(gt) movgt r0, #1 // set result register to 1 if equal movle r0, #0 bx lr diff --git a/lib/libcompiler_rt/arm/ledf2vfp.S b/lib/libcompiler_rt/arm/ledf2vfp.S index ca5b553f115..4bbe4c86837 100644 --- a/lib/libcompiler_rt/arm/ledf2vfp.S +++ b/lib/libcompiler_rt/arm/ledf2vfp.S @@ -19,10 +19,15 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__ledf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else vmov d6, r0, r1 // load r0/r1 pair in double register vmov d7, r2, r3 // load r2/r3 pair in double register vcmp.f64 d6, d7 +#endif vmrs apsr_nzcv, fpscr + ITE(ls) movls r0, #1 // set result register to 1 if equal movhi r0, #0 bx lr diff --git a/lib/libcompiler_rt/arm/lesf2vfp.S b/lib/libcompiler_rt/arm/lesf2vfp.S index f25422ece8f..51232bd8ced 100644 --- a/lib/libcompiler_rt/arm/lesf2vfp.S +++ b/lib/libcompiler_rt/arm/lesf2vfp.S @@ -19,10 +19,15 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__lesf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else vmov s14, r0 // move from GPR 0 to float register vmov s15, r1 // move from GPR 1 to float register vcmp.f32 s14, s15 +#endif vmrs apsr_nzcv, fpscr + ITE(ls) movls r0, #1 // set result register to 1 if equal movhi r0, #0 bx lr diff --git a/lib/libcompiler_rt/arm/ltdf2vfp.S b/lib/libcompiler_rt/arm/ltdf2vfp.S index 6e2c0997c01..8e2928c813d 100644 --- a/lib/libcompiler_rt/arm/ltdf2vfp.S +++ b/lib/libcompiler_rt/arm/ltdf2vfp.S @@ -19,10 +19,15 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__ltdf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else vmov d6, r0, r1 // load r0/r1 pair in double register vmov d7, r2, r3 // load r2/r3 pair in double register vcmp.f64 d6, d7 +#endif vmrs apsr_nzcv, fpscr + ITE(mi) movmi r0, #1 // set result register to 1 if equal movpl r0, #0 bx lr diff --git a/lib/libcompiler_rt/arm/ltsf2vfp.S b/lib/libcompiler_rt/arm/ltsf2vfp.S index 95febb60672..59c00c6bab6 100644 --- a/lib/libcompiler_rt/arm/ltsf2vfp.S +++ b/lib/libcompiler_rt/arm/ltsf2vfp.S @@ -19,10 +19,15 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__ltsf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else vmov s14, r0 // move from GPR 0 to float register vmov s15, r1 // move from GPR 1 to float register vcmp.f32 s14, s15 +#endif vmrs apsr_nzcv, fpscr + ITE(mi) movmi r0, #1 // set result register to 1 if equal movpl r0, #0 bx lr diff --git a/lib/libcompiler_rt/arm/muldf3vfp.S b/lib/libcompiler_rt/arm/muldf3vfp.S index f638de1ad28..aa7b2349503 100644 --- a/lib/libcompiler_rt/arm/muldf3vfp.S +++ b/lib/libcompiler_rt/arm/muldf3vfp.S @@ -18,10 +18,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__muldf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmul.f64 d0, d0, d1 +#else vmov d6, r0, r1 // move first param from r0/r1 pair into d6 vmov d7, r2, r3 // move second param from r2/r3 pair into d7 - vmul.f64 d6, d6, d7 + vmul.f64 d6, d6, d7 vmov r0, r1, d6 // move result back to r0/r1 pair +#endif bx lr END_COMPILERRT_FUNCTION(__muldf3vfp) diff --git a/lib/libcompiler_rt/arm/mulsf3vfp.S b/lib/libcompiler_rt/arm/mulsf3vfp.S index bef58d3a0c8..a1da789dcad 100644 --- a/lib/libcompiler_rt/arm/mulsf3vfp.S +++ b/lib/libcompiler_rt/arm/mulsf3vfp.S @@ -18,9 +18,13 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__mulsf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmul.f32 s0, s0, s1 +#else vmov s14, r0 // move first param from r0 into float register vmov s15, r1 // move second param from r1 into float register vmul.f32 s13, s14, s15 +#endif vmov r0, s13 // move result back to r0 bx lr END_COMPILERRT_FUNCTION(__mulsf3vfp) diff --git a/lib/libcompiler_rt/arm/nedf2vfp.S b/lib/libcompiler_rt/arm/nedf2vfp.S index 78cf529d665..aef72eb0097 100644 --- a/lib/libcompiler_rt/arm/nedf2vfp.S +++ b/lib/libcompiler_rt/arm/nedf2vfp.S @@ -19,10 +19,15 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__nedf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else vmov d6, r0, r1 // load r0/r1 pair in double register vmov d7, r2, r3 // load r2/r3 pair in double register vcmp.f64 d6, d7 +#endif vmrs apsr_nzcv, fpscr + ITE(ne) movne r0, #1 // set result register to 0 if unequal moveq r0, #0 bx lr diff --git a/lib/libcompiler_rt/arm/negdf2vfp.S b/lib/libcompiler_rt/arm/negdf2vfp.S index 01c8ba6a120..81f0ab8eec1 100644 --- a/lib/libcompiler_rt/arm/negdf2vfp.S +++ b/lib/libcompiler_rt/arm/negdf2vfp.S @@ -18,7 +18,11 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__negdf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vneg.f64 d0, d0 +#else eor r1, r1, #-2147483648 // flip sign bit on double in r0/r1 pair +#endif bx lr END_COMPILERRT_FUNCTION(__negdf2vfp) diff --git a/lib/libcompiler_rt/arm/negsf2vfp.S b/lib/libcompiler_rt/arm/negsf2vfp.S index 797abb32ead..46ab4a9cf16 100644 --- a/lib/libcompiler_rt/arm/negsf2vfp.S +++ b/lib/libcompiler_rt/arm/negsf2vfp.S @@ -18,7 +18,11 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__negsf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vneg.f32 s0, s0 +#else eor r0, r0, #-2147483648 // flip sign bit on float in r0 +#endif bx lr END_COMPILERRT_FUNCTION(__negsf2vfp) diff --git a/lib/libcompiler_rt/arm/nesf2vfp.S b/lib/libcompiler_rt/arm/nesf2vfp.S index 554d3e46751..50d60f49300 100644 --- a/lib/libcompiler_rt/arm/nesf2vfp.S +++ b/lib/libcompiler_rt/arm/nesf2vfp.S @@ -19,10 +19,15 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__nesf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else vmov s14, r0 // move from GPR 0 to float register vmov s15, r1 // move from GPR 1 to float register vcmp.f32 s14, s15 +#endif vmrs apsr_nzcv, fpscr + ITE(ne) movne r0, #1 // set result register to 1 if unequal moveq r0, #0 bx lr diff --git a/lib/libcompiler_rt/arm/subdf3vfp.S b/lib/libcompiler_rt/arm/subdf3vfp.S index 1fc7d18c3d3..2b6f2bdbfdd 100644 --- a/lib/libcompiler_rt/arm/subdf3vfp.S +++ b/lib/libcompiler_rt/arm/subdf3vfp.S @@ -18,10 +18,14 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__subdf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vsub.f64 d0, d0, d1 +#else vmov d6, r0, r1 // move first param from r0/r1 pair into d6 vmov d7, r2, r3 // move second param from r2/r3 pair into d7 vsub.f64 d6, d6, d7 vmov r0, r1, d6 // move result back to r0/r1 pair +#endif bx lr END_COMPILERRT_FUNCTION(__subdf3vfp) diff --git a/lib/libcompiler_rt/arm/subsf3vfp.S b/lib/libcompiler_rt/arm/subsf3vfp.S index 11fe386cd0d..3e83ea26507 100644 --- a/lib/libcompiler_rt/arm/subsf3vfp.S +++ b/lib/libcompiler_rt/arm/subsf3vfp.S @@ -12,17 +12,21 @@ // // extern float __subsf3vfp(float a, float b); // -// Returns the difference between two single precision floating point numbers +// Returns the difference between two single precision floating point numbers // using the Darwin calling convention where single arguments are passsed // like 32-bit ints. // .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__subsf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vsub.f32 s0, s0, s1 +#else vmov s14, r0 // move first param from r0 into float register vmov s15, r1 // move second param from r1 into float register vsub.f32 s14, s14, s15 vmov r0, s14 // move result back to r0 +#endif bx lr END_COMPILERRT_FUNCTION(__subsf3vfp) diff --git a/lib/libcompiler_rt/arm/truncdfsf2vfp.S b/lib/libcompiler_rt/arm/truncdfsf2vfp.S index 04287ad27ce..682e54d3d29 100644 --- a/lib/libcompiler_rt/arm/truncdfsf2vfp.S +++ b/lib/libcompiler_rt/arm/truncdfsf2vfp.S @@ -19,9 +19,13 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__truncdfsf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcvt.f32.f64 s0, d0 +#else vmov d7, r0, r1 // load double from r0/r1 pair vcvt.f32.f64 s15, d7 // convert double to single (trucate precision) vmov r0, s15 // return result in r0 +#endif bx lr END_COMPILERRT_FUNCTION(__truncdfsf2vfp) diff --git a/lib/libcompiler_rt/arm/udivsi3.S b/lib/libcompiler_rt/arm/udivsi3.S index 085f8fb9e2d..b97b3080bff 100644 --- a/lib/libcompiler_rt/arm/udivsi3.S +++ b/lib/libcompiler_rt/arm/udivsi3.S @@ -37,15 +37,38 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3) beq LOCAL_LABEL(divby0) udiv r0, r0, r1 bx lr -#else + +LOCAL_LABEL(divby0): + mov r0, #0 +# ifdef __ARM_EABI__ + b __aeabi_idiv0 +# else + JMP(lr) +# endif + +#else /* ! __ARM_ARCH_EXT_IDIV__ */ cmp r1, #1 bcc LOCAL_LABEL(divby0) +#if __ARM_ARCH_ISA_THUMB == 1 + bne LOCAL_LABEL(num_neq_denom) + JMP(lr) +LOCAL_LABEL(num_neq_denom): +#else IT(eq) JMPc(lr, eq) +#endif cmp r0, r1 +#if __ARM_ARCH_ISA_THUMB == 1 + bhs LOCAL_LABEL(num_ge_denom) + movs r0, #0 + JMP(lr) +LOCAL_LABEL(num_ge_denom): +#else ITT(cc) movcc r0, #0 JMPc(lr, cc) +#endif + /* * Implement division using binary long division algorithm. * @@ -62,7 +85,7 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3) * that (r0 << shift) < 2 * r1. The quotient is stored in r3. */ -# ifdef __ARM_FEATURE_CLZ +# if defined(__ARM_FEATURE_CLZ) clz ip, r0 clz r3, r1 /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ @@ -77,49 +100,131 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3) sub ip, ip, r3, lsl #3 mov r3, #0 bx ip -# else +# else /* No CLZ Feature */ # if __ARM_ARCH_ISA_THUMB == 2 # error THUMB mode requires CLZ or UDIV # endif +# if __ARM_ARCH_ISA_THUMB == 1 +# define BLOCK_SIZE 10 +# else +# define BLOCK_SIZE 12 +# endif + mov r2, r0 +# if __ARM_ARCH_ISA_THUMB == 1 + mov ip, r0 + adr r0, LOCAL_LABEL(div0block) + adds r0, #1 +# else adr ip, LOCAL_LABEL(div0block) - - lsr r3, r2, #16 +# endif + lsrs r3, r2, #16 cmp r3, r1 +# if __ARM_ARCH_ISA_THUMB == 1 + blo LOCAL_LABEL(skip_16) + movs r2, r3 + subs r0, r0, #(16 * BLOCK_SIZE) +LOCAL_LABEL(skip_16): +# else movhs r2, r3 - subhs ip, ip, #(16 * 12) + subhs ip, ip, #(16 * BLOCK_SIZE) +# endif - lsr r3, r2, #8 + lsrs r3, r2, #8 cmp r3, r1 +# if __ARM_ARCH_ISA_THUMB == 1 + blo LOCAL_LABEL(skip_8) + movs r2, r3 + subs r0, r0, #(8 * BLOCK_SIZE) +LOCAL_LABEL(skip_8): +# else movhs r2, r3 - subhs ip, ip, #(8 * 12) + subhs ip, ip, #(8 * BLOCK_SIZE) +# endif - lsr r3, r2, #4 + lsrs r3, r2, #4 cmp r3, r1 +# if __ARM_ARCH_ISA_THUMB == 1 + blo LOCAL_LABEL(skip_4) + movs r2, r3 + subs r0, r0, #(4 * BLOCK_SIZE) +LOCAL_LABEL(skip_4): +# else movhs r2, r3 - subhs ip, #(4 * 12) + subhs ip, #(4 * BLOCK_SIZE) +# endif - lsr r3, r2, #2 + lsrs r3, r2, #2 cmp r3, r1 +# if __ARM_ARCH_ISA_THUMB == 1 + blo LOCAL_LABEL(skip_2) + movs r2, r3 + subs r0, r0, #(2 * BLOCK_SIZE) +LOCAL_LABEL(skip_2): +# else movhs r2, r3 - subhs ip, ip, #(2 * 12) + subhs ip, ip, #(2 * BLOCK_SIZE) +# endif /* Last block, no need to update r2 or r3. */ +# if __ARM_ARCH_ISA_THUMB == 1 + lsrs r3, r2, #1 + cmp r3, r1 + blo LOCAL_LABEL(skip_1) + subs r0, r0, #(1 * BLOCK_SIZE) +LOCAL_LABEL(skip_1): + movs r2, r0 + mov r0, ip + movs r3, #0 + JMP (r2) + +# else cmp r1, r2, lsr #1 - subls ip, ip, #(1 * 12) + subls ip, ip, #(1 * BLOCK_SIZE) - mov r3, #0 + movs r3, #0 JMP(ip) -# endif +# endif +# endif /* __ARM_FEATURE_CLZ */ + #define IMM # + /* due to the range limit of branch in Thumb1, we have to place the + block closer */ +LOCAL_LABEL(divby0): + movs r0, #0 +# if defined(__ARM_EABI__) + push {r7, lr} + bl __aeabi_idiv0 // due to relocation limit, can't use b. + pop {r7, pc} +# else + JMP(lr) +# endif + +#if __ARM_ARCH_ISA_THUMB == 1 +#define block(shift) \ + lsls r2, r1, IMM shift; \ + cmp r0, r2; \ + blo LOCAL_LABEL(block_skip_##shift); \ + subs r0, r0, r2; \ + LOCAL_LABEL(block_skip_##shift) :; \ + adcs r3, r3 /* same as ((r3 << 1) | Carry). Carry is set if r0 >= r2. */ + + /* TODO: if current location counter is not not word aligned, we don't + need the .p2align and nop */ + /* Label div0block must be word-aligned. First align block 31 */ + .p2align 2 + nop /* Padding to align div0block as 31 blocks = 310 bytes */ + +#else #define block(shift) \ cmp r0, r1, lsl IMM shift; \ ITT(hs); \ WIDE(addhs) r3, r3, IMM (1 << shift); \ WIDE(subhs) r0, r0, r1, lsl IMM shift +#endif block(31) block(30) @@ -159,14 +264,6 @@ LOCAL_LABEL(div0block): JMP(lr) #endif /* __ARM_ARCH_EXT_IDIV__ */ -LOCAL_LABEL(divby0): - mov r0, #0 -#ifdef __ARM_EABI__ - b __aeabi_idiv0 -#else - JMP(lr) -#endif - END_COMPILERRT_FUNCTION(__udivsi3) NO_EXEC_STACK_DIRECTIVE diff --git a/lib/libcompiler_rt/arm/unorddf2vfp.S b/lib/libcompiler_rt/arm/unorddf2vfp.S index 022dd7a978a..6625fa8a311 100644 --- a/lib/libcompiler_rt/arm/unorddf2vfp.S +++ b/lib/libcompiler_rt/arm/unorddf2vfp.S @@ -19,10 +19,15 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__unorddf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else vmov d6, r0, r1 // load r0/r1 pair in double register vmov d7, r2, r3 // load r2/r3 pair in double register - vcmp.f64 d6, d7 + vcmp.f64 d6, d7 +#endif vmrs apsr_nzcv, fpscr + ITE(vs) movvs r0, #1 // set result register to 1 if "overflow" (any NaNs) movvc r0, #0 bx lr diff --git a/lib/libcompiler_rt/arm/unordsf2vfp.S b/lib/libcompiler_rt/arm/unordsf2vfp.S index 5ebdd3df550..0b5da2ba3e1 100644 --- a/lib/libcompiler_rt/arm/unordsf2vfp.S +++ b/lib/libcompiler_rt/arm/unordsf2vfp.S @@ -19,10 +19,15 @@ .syntax unified .p2align 2 DEFINE_COMPILERRT_FUNCTION(__unordsf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else vmov s14, r0 // move from GPR 0 to float register vmov s15, r1 // move from GPR 1 to float register vcmp.f32 s14, s15 +#endif vmrs apsr_nzcv, fpscr + ITE(vs) movvs r0, #1 // set result register to 1 if "overflow" (any NaNs) movvc r0, #0 bx lr diff --git a/lib/libcompiler_rt/arm64/Makefile.mk b/lib/libcompiler_rt/arm64/Makefile.mk deleted file mode 100644 index 7f7e3866130..00000000000 --- a/lib/libcompiler_rt/arm64/Makefile.mk +++ /dev/null @@ -1,20 +0,0 @@ -#===- lib/builtins/arm64/Makefile.mk -----------------------*- Makefile -*--===# -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -#===------------------------------------------------------------------------===# - -ModuleName := builtins -SubDirs := -OnlyArchs := arm64 - -AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file))) -Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) -ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o) -Implementation := Optimized - -# FIXME: use automatic dependencies? -Dependencies := $(wildcard lib/*.h $(Dir)/*.h) diff --git a/lib/libcompiler_rt/armv6m/Makefile.mk b/lib/libcompiler_rt/armv6m/Makefile.mk deleted file mode 100644 index f3c1807f01b..00000000000 --- a/lib/libcompiler_rt/armv6m/Makefile.mk +++ /dev/null @@ -1,20 +0,0 @@ -#===- lib/builtins/arm/Makefile.mk -------------------------*- Makefile -*--===# -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -#===------------------------------------------------------------------------===# - -ModuleName := builtins -SubDirs := -OnlyArchs := armv6m - -AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file))) -Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) -ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o) -Implementation := Optimized - -# FIXME: use automatic dependencies? -Dependencies := $(wildcard lib/*.h $(Dir)/*.h) diff --git a/lib/libcompiler_rt/ashldi3.c b/lib/libcompiler_rt/ashldi3.c index eb4698ac517..fcb0abdb1fc 100644 --- a/lib/libcompiler_rt/ashldi3.c +++ b/lib/libcompiler_rt/ashldi3.c @@ -18,8 +18,6 @@ /* Precondition: 0 <= b < bits_in_dword */ -ARM_EABI_FNALIAS(llsl, ashldi3) - COMPILER_RT_ABI di_int __ashldi3(di_int a, si_int b) { @@ -41,3 +39,10 @@ __ashldi3(di_int a, si_int b) } return result.all; } + +#if defined(__ARM_EABI__) +AEABI_RTABI di_int __aeabi_llsl(di_int a, si_int b) { + return __ashldi3(a, b); +} +#endif + diff --git a/lib/libcompiler_rt/ashrdi3.c b/lib/libcompiler_rt/ashrdi3.c index 14c878bb779..b4ab4c617ba 100644 --- a/lib/libcompiler_rt/ashrdi3.c +++ b/lib/libcompiler_rt/ashrdi3.c @@ -18,8 +18,6 @@ /* Precondition: 0 <= b < bits_in_dword */ -ARM_EABI_FNALIAS(lasr, ashrdi3) - COMPILER_RT_ABI di_int __ashrdi3(di_int a, si_int b) { @@ -42,3 +40,10 @@ __ashrdi3(di_int a, si_int b) } return result.all; } + +#if defined(__ARM_EABI__) +AEABI_RTABI di_int __aeabi_lasr(di_int a, si_int b) { + return __ashrdi3(a, b); +} +#endif + diff --git a/lib/libcompiler_rt/assembly.h b/lib/libcompiler_rt/assembly.h index 5fc74f68f60..b15da52345c 100644 --- a/lib/libcompiler_rt/assembly.h +++ b/lib/libcompiler_rt/assembly.h @@ -44,7 +44,8 @@ #endif #define CONST_SECTION .section .rodata -#if defined(__GNU__) || defined(__ANDROID__) || defined(__FreeBSD__) +#if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \ + defined(__linux__) #define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits #else #define NO_EXEC_STACK_DIRECTIVE @@ -70,7 +71,7 @@ #if defined(__ARM_ARCH_4T__) || __ARM_ARCH >= 5 #define ARM_HAS_BX #endif -#if !defined(__ARM_FEATURE_CLZ) && \ +#if !defined(__ARM_FEATURE_CLZ) && __ARM_ARCH_ISA_THUMB != 1 && \ (__ARM_ARCH >= 6 || (__ARM_ARCH == 5 && !defined(__ARM_ARCH_5__))) #define __ARM_FEATURE_CLZ #endif @@ -95,9 +96,11 @@ #if __ARM_ARCH_ISA_THUMB == 2 #define IT(cond) it cond #define ITT(cond) itt cond +#define ITE(cond) ite cond #else #define IT(cond) #define ITT(cond) +#define ITE(cond) #endif #if __ARM_ARCH_ISA_THUMB == 2 @@ -149,6 +152,7 @@ #define DEFINE_COMPILERRT_FUNCTION_ALIAS(name, target) \ .globl SYMBOL_NAME(name) SEPARATOR \ SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ + DECLARE_SYMBOL_VISIBILITY(SYMBOL_NAME(name)) SEPARATOR \ .set SYMBOL_NAME(name), SYMBOL_NAME(target) SEPARATOR #if defined(__ARM_EABI__) diff --git a/lib/libcompiler_rt/atomic.c b/lib/libcompiler_rt/atomic.c index f1ddc3e0c52..ee35e342eda 100644 --- a/lib/libcompiler_rt/atomic.c +++ b/lib/libcompiler_rt/atomic.c @@ -229,13 +229,20 @@ void __atomic_exchange_c(int size, void *ptr, void *val, void *old, int model) { // Where the size is known at compile time, the compiler may emit calls to // specialised versions of the above functions. //////////////////////////////////////////////////////////////////////////////// +#ifdef __SIZEOF_INT128__ #define OPTIMISED_CASES\ OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t)\ OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t)\ OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t)\ OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t)\ - /* FIXME: __uint128_t isn't available on 32 bit platforms. - OPTIMISED_CASE(16, IS_LOCK_FREE_16, __uint128_t)*/\ + OPTIMISED_CASE(16, IS_LOCK_FREE_16, __uint128_t) +#else +#define OPTIMISED_CASES\ + OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t)\ + OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t)\ + OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t)\ + OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t) +#endif #define OPTIMISED_CASE(n, lockfree, type)\ type __atomic_load_##n(type *src, int model) {\ diff --git a/lib/libcompiler_rt/bswapdi2.c b/lib/libcompiler_rt/bswapdi2.c new file mode 100644 index 00000000000..eb220007bb1 --- /dev/null +++ b/lib/libcompiler_rt/bswapdi2.c @@ -0,0 +1,27 @@ +/* ===-- bswapdi2.c - Implement __bswapdi2 ---------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __bswapdi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +COMPILER_RT_ABI uint64_t __bswapdi2(uint64_t u) { + return ( + (((u)&0xff00000000000000ULL) >> 56) | + (((u)&0x00ff000000000000ULL) >> 40) | + (((u)&0x0000ff0000000000ULL) >> 24) | + (((u)&0x000000ff00000000ULL) >> 8) | + (((u)&0x00000000ff000000ULL) << 8) | + (((u)&0x0000000000ff0000ULL) << 24) | + (((u)&0x000000000000ff00ULL) << 40) | + (((u)&0x00000000000000ffULL) << 56)); +} diff --git a/lib/libcompiler_rt/bswapsi2.c b/lib/libcompiler_rt/bswapsi2.c new file mode 100644 index 00000000000..5d941e69f7c --- /dev/null +++ b/lib/libcompiler_rt/bswapsi2.c @@ -0,0 +1,23 @@ +/* ===-- bswapsi2.c - Implement __bswapsi2 ---------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __bswapsi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +COMPILER_RT_ABI uint32_t __bswapsi2(uint32_t u) { + return ( + (((u)&0xff000000) >> 24) | + (((u)&0x00ff0000) >> 8) | + (((u)&0x0000ff00) << 8) | + (((u)&0x000000ff) << 24)); +} diff --git a/lib/libcompiler_rt/clear_cache.c b/lib/libcompiler_rt/clear_cache.c index 55bbdd37589..25570fc2157 100644 --- a/lib/libcompiler_rt/clear_cache.c +++ b/lib/libcompiler_rt/clear_cache.c @@ -82,10 +82,6 @@ uintptr_t GetCurrentProcess(void); #endif #endif -#if defined(__linux__) && defined(__arm__) - #include -#endif - /* * The compiler generates calls to __clear_cache() when creating * trampoline functions on the stack for use with nested functions. @@ -94,7 +90,7 @@ uintptr_t GetCurrentProcess(void); */ void __clear_cache(void *start, void *end) { -#if __i386__ || __x86_64__ +#if __i386__ || __x86_64__ || defined(_M_IX86) || defined(_M_X64) /* * Intel processors have a unified instruction and data cache * so there is nothing to do @@ -108,12 +104,23 @@ void __clear_cache(void *start, void *end) { sysarch(ARM_SYNC_ICACHE, &arg); #elif defined(__linux__) + /* + * We used to include asm/unistd.h for the __ARM_NR_cacheflush define, but + * it also brought many other unused defines, as well as a dependency on + * kernel headers to be installed. + * + * This value is stable at least since Linux 3.13 and should remain so for + * compatibility reasons, warranting it's re-definition here. + */ + #define __ARM_NR_cacheflush 0x0f0002 register int start_reg __asm("r0") = (int) (intptr_t) start; const register int end_reg __asm("r1") = (int) (intptr_t) end; + const register int flags __asm("r2") = 0; const register int syscall_nr __asm("r7") = __ARM_NR_cacheflush; __asm __volatile("svc 0x0" : "=r"(start_reg) - : "r"(syscall_nr), "r"(start_reg), "r"(end_reg)); + : "r"(syscall_nr), "r"(start_reg), "r"(end_reg), + "r"(flags)); if (start_reg != 0) { compilerrt_abort(); } diff --git a/lib/libcompiler_rt/comparedf2.c b/lib/libcompiler_rt/comparedf2.c index 9e29752231e..c5bb169d002 100644 --- a/lib/libcompiler_rt/comparedf2.c +++ b/lib/libcompiler_rt/comparedf2.c @@ -113,8 +113,6 @@ __gedf2(fp_t a, fp_t b) { } } -ARM_EABI_FNALIAS(dcmpun, unorddf2) - COMPILER_RT_ABI int __unorddf2(fp_t a, fp_t b) { const rep_t aAbs = toRep(a) & absMask; @@ -144,3 +142,9 @@ __gtdf2(fp_t a, fp_t b) { return __gedf2(a, b); } +#if defined(__ARM_EABI__) +AEABI_RTABI int __aeabi_dcmpun(fp_t a, fp_t b) { + return __unorddf2(a, b); +} +#endif + diff --git a/lib/libcompiler_rt/comparesf2.c b/lib/libcompiler_rt/comparesf2.c index 1fd50636aba..4badb5e1b9f 100644 --- a/lib/libcompiler_rt/comparesf2.c +++ b/lib/libcompiler_rt/comparesf2.c @@ -113,8 +113,6 @@ __gesf2(fp_t a, fp_t b) { } } -ARM_EABI_FNALIAS(fcmpun, unordsf2) - COMPILER_RT_ABI int __unordsf2(fp_t a, fp_t b) { const rep_t aAbs = toRep(a) & absMask; @@ -143,3 +141,10 @@ COMPILER_RT_ABI enum GE_RESULT __gtsf2(fp_t a, fp_t b) { return __gesf2(a, b); } + +#if defined(__ARM_EABI__) +AEABI_RTABI int __aeabi_fcmpun(fp_t a, fp_t b) { + return __unordsf2(a, b); +} +#endif + diff --git a/lib/libcompiler_rt/cpu_model.c b/lib/libcompiler_rt/cpu_model.c index 9a3737020a4..83ea7a49faf 100644 --- a/lib/libcompiler_rt/cpu_model.c +++ b/lib/libcompiler_rt/cpu_model.c @@ -27,6 +27,10 @@ #include #endif +#ifndef __has_attribute +#define __has_attribute(attr) 0 +#endif + enum VendorSignatures { SIG_INTEL = 0x756e6547 /* Genu */, SIG_AMD = 0x68747541 /* Auth */ @@ -40,29 +44,16 @@ enum ProcessorVendors { }; enum ProcessorTypes { - INTEL_ATOM = 1, + INTEL_BONNELL = 1, INTEL_CORE2, INTEL_COREI7, AMDFAM10H, AMDFAM15H, - INTEL_i386, - INTEL_i486, - INTEL_PENTIUM, - INTEL_PENTIUM_PRO, - INTEL_PENTIUM_II, - INTEL_PENTIUM_III, - INTEL_PENTIUM_IV, - INTEL_PENTIUM_M, - INTEL_CORE_DUO, - INTEL_XEONPHI, - INTEL_X86_64, - INTEL_NOCONA, - INTEL_PRESCOTT, - AMD_i486, - AMDPENTIUM, - AMDATHLON, - AMDFAM14H, - AMDFAM16H, + INTEL_SILVERMONT, + INTEL_KNL, + AMD_BTVER1, + AMD_BTVER2, + AMDFAM17H, CPU_TYPE_MAX }; @@ -75,32 +66,14 @@ enum ProcessorSubtypes { AMDFAM10H_ISTANBUL, AMDFAM15H_BDVER1, AMDFAM15H_BDVER2, - INTEL_PENTIUM_MMX, - INTEL_CORE2_65, - INTEL_CORE2_45, + AMDFAM15H_BDVER3, + AMDFAM15H_BDVER4, + AMDFAM17H_ZNVER1, INTEL_COREI7_IVYBRIDGE, INTEL_COREI7_HASWELL, INTEL_COREI7_BROADWELL, INTEL_COREI7_SKYLAKE, INTEL_COREI7_SKYLAKE_AVX512, - INTEL_ATOM_BONNELL, - INTEL_ATOM_SILVERMONT, - INTEL_KNIGHTS_LANDING, - AMDPENTIUM_K6, - AMDPENTIUM_K62, - AMDPENTIUM_K63, - AMDPENTIUM_GEODE, - AMDATHLON_TBIRD, - AMDATHLON_MP, - AMDATHLON_XP, - AMDATHLON_K8SSE3, - AMDATHLON_OPTERON, - AMDATHLON_FX, - AMDATHLON_64, - AMD_BTVER1, - AMD_BTVER2, - AMDFAM15H_BDVER3, - AMDFAM15H_BDVER4, CPU_SUBTYPE_MAX }; @@ -116,11 +89,26 @@ enum ProcessorFeatures { FEATURE_SSE4_2, FEATURE_AVX, FEATURE_AVX2, - FEATURE_AVX512, - FEATURE_AVX512SAVE, - FEATURE_MOVBE, - FEATURE_ADX, - FEATURE_EM64T + FEATURE_SSE4_A, + FEATURE_FMA4, + FEATURE_XOP, + FEATURE_FMA, + FEATURE_AVX512F, + FEATURE_BMI, + FEATURE_BMI2, + FEATURE_AES, + FEATURE_PCLMUL, + FEATURE_AVX512VL, + FEATURE_AVX512BW, + FEATURE_AVX512DQ, + FEATURE_AVX512CD, + FEATURE_AVX512ER, + FEATURE_AVX512PF, + FEATURE_AVX512VBMI, + FEATURE_AVX512IFMA, + FEATURE_AVX5124VNNIW, + FEATURE_AVX5124FMAPS, + FEATURE_AVX512VPOPCNTDQ }; // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). @@ -160,26 +148,27 @@ static bool isCpuIdSupported() { /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in /// the specified arguments. If we can't run cpuid on the host, return true. -static void getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, +static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { #if defined(__GNUC__) || defined(__clang__) #if defined(__x86_64__) - // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually. + // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. + // FIXME: should we save this for Clang? __asm__("movq\t%%rbx, %%rsi\n\t" "cpuid\n\t" "xchgq\t%%rbx, %%rsi\n\t" : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) : "a"(value)); + return false; #elif defined(__i386__) __asm__("movl\t%%ebx, %%esi\n\t" "cpuid\n\t" "xchgl\t%%ebx, %%esi\n\t" : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) : "a"(value)); -// pedantic #else returns to appease -Wunreachable-code (so we don't generate -// postprocessed code that looks like "return true; return false;") + return false; #else - assert(0 && "This method is defined only for x86."); + return true; #endif #elif defined(_MSC_VER) // The MSVC intrinsic is portable across x86 and x64. @@ -189,19 +178,20 @@ static void getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, *rEBX = registers[1]; *rECX = registers[2]; *rEDX = registers[3]; + return false; #else - assert(0 && "This method is defined only for GNUC, Clang or MSVC."); + return true; #endif } /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return /// the 4 values in the specified arguments. If we can't run cpuid on the host, /// return true. -static void getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, +static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX, unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { -#if defined(__x86_64__) || defined(_M_X64) #if defined(__GNUC__) || defined(__clang__) +#if defined(__x86_64__) // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. // FIXME: should we save this for Clang? __asm__("movq\t%%rbx, %%rsi\n\t" @@ -209,42 +199,27 @@ static void getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, "xchgq\t%%rbx, %%rsi\n\t" : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) : "a"(value), "c"(subleaf)); -#elif defined(_MSC_VER) - int registers[4]; - __cpuidex(registers, value, subleaf); - *rEAX = registers[0]; - *rEBX = registers[1]; - *rECX = registers[2]; - *rEDX = registers[3]; -#else - assert(0 && "This method is defined only for GNUC, Clang or MSVC."); -#endif -#elif defined(__i386__) || defined(_M_IX86) -#if defined(__GNUC__) || defined(__clang__) + return false; +#elif defined(__i386__) __asm__("movl\t%%ebx, %%esi\n\t" "cpuid\n\t" "xchgl\t%%ebx, %%esi\n\t" : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) : "a"(value), "c"(subleaf)); -#elif defined(_MSC_VER) - __asm { - mov eax,value - mov ecx,subleaf - cpuid - mov esi,rEAX - mov dword ptr [esi],eax - mov esi,rEBX - mov dword ptr [esi],ebx - mov esi,rECX - mov dword ptr [esi],ecx - mov esi,rEDX - mov dword ptr [esi],edx - } + return false; #else - assert(0 && "This method is defined only for GNUC, Clang or MSVC."); + return true; #endif +#elif defined(_MSC_VER) + int registers[4]; + __cpuidex(registers, value, subleaf); + *rEAX = registers[0]; + *rEBX = registers[1]; + *rECX = registers[2]; + *rEDX = registers[3]; + return false; #else - assert(0 && "This method is defined only for x86."); + return true; #endif } @@ -279,84 +254,15 @@ static void detectX86FamilyModel(unsigned EAX, unsigned *Family, } } -static void getIntelProcessorTypeAndSubtype(unsigned int Family, - unsigned int Model, - unsigned int Brand_id, - unsigned int Features, - unsigned *Type, unsigned *Subtype) { +static void +getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, + unsigned Brand_id, unsigned Features, + unsigned *Type, unsigned *Subtype) { if (Brand_id != 0) return; switch (Family) { - case 3: - *Type = INTEL_i386; - break; - case 4: - switch (Model) { - case 0: // Intel486 DX processors - case 1: // Intel486 DX processors - case 2: // Intel486 SX processors - case 3: // Intel487 processors, IntelDX2 OverDrive processors, - // IntelDX2 processors - case 4: // Intel486 SL processor - case 5: // IntelSX2 processors - case 7: // Write-Back Enhanced IntelDX2 processors - case 8: // IntelDX4 OverDrive processors, IntelDX4 processors - default: - *Type = INTEL_i486; - break; - } - case 5: - switch (Model) { - case 1: // Pentium OverDrive processor for Pentium processor (60, 66), - // Pentium processors (60, 66) - case 2: // Pentium OverDrive processor for Pentium processor (75, 90, - // 100, 120, 133), Pentium processors (75, 90, 100, 120, 133, - // 150, 166, 200) - case 3: // Pentium OverDrive processors for Intel486 processor-based - // systems - *Type = INTEL_PENTIUM; - break; - case 4: // Pentium OverDrive processor with MMX technology for Pentium - // processor (75, 90, 100, 120, 133), Pentium processor with - // MMX technology (166, 200) - *Type = INTEL_PENTIUM; - *Subtype = INTEL_PENTIUM_MMX; - break; - default: - *Type = INTEL_PENTIUM; - break; - } case 6: switch (Model) { - case 0x01: // Pentium Pro processor - *Type = INTEL_PENTIUM_PRO; - break; - case 0x03: // Intel Pentium II OverDrive processor, Pentium II processor, - // model 03 - case 0x05: // Pentium II processor, model 05, Pentium II Xeon processor, - // model 05, and Intel Celeron processor, model 05 - case 0x06: // Celeron processor, model 06 - *Type = INTEL_PENTIUM_II; - break; - case 0x07: // Pentium III processor, model 07, and Pentium III Xeon - // processor, model 07 - case 0x08: // Pentium III processor, model 08, Pentium III Xeon processor, - // model 08, and Celeron processor, model 08 - case 0x0a: // Pentium III Xeon processor, model 0Ah - case 0x0b: // Pentium III processor, model 0Bh - *Type = INTEL_PENTIUM_III; - break; - case 0x09: // Intel Pentium M processor, Intel Celeron M processor model 09. - case 0x0d: // Intel Pentium M processor, Intel Celeron M processor, model - // 0Dh. All processors are manufactured using the 90 nm process. - case 0x15: // Intel EP80579 Integrated Processor and Intel EP80579 - // Integrated Processor with Intel QuickAssist Technology - *Type = INTEL_PENTIUM_M; - break; - case 0x0e: // Intel Core Duo processor, Intel Core Solo processor, model - // 0Eh. All processors are manufactured using the 65 nm process. - *Type = INTEL_CORE_DUO; - break; // yonah case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile // processor, Intel Core 2 Quad processor, Intel Core 2 Quad // mobile processor, Intel Core 2 Extreme processor, Intel @@ -364,9 +270,6 @@ static void getIntelProcessorTypeAndSubtype(unsigned int Family, // 0Fh. All processors are manufactured using the 65 nm process. case 0x16: // Intel Celeron processor model 16h. All processors are // manufactured using the 65 nm process - *Type = INTEL_CORE2; // "core2" - *Subtype = INTEL_CORE2_65; - break; case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model // 17h. All processors are manufactured using the 45 nm process. // @@ -374,14 +277,13 @@ static void getIntelProcessorTypeAndSubtype(unsigned int Family, case 0x1d: // Intel Xeon processor MP. All processors are manufactured using // the 45 nm process. *Type = INTEL_CORE2; // "penryn" - *Subtype = INTEL_CORE2_45; break; case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All // processors are manufactured using the 45 nm process. case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. // As found in a Summer 2010 model iMac. case 0x1f: - case 0x2e: // Nehalem EX + case 0x2e: // Nehalem EX *Type = INTEL_COREI7; // "nehalem" *Subtype = INTEL_COREI7_NEHALEM; break; @@ -399,7 +301,7 @@ static void getIntelProcessorTypeAndSubtype(unsigned int Family, *Subtype = INTEL_COREI7_SANDYBRIDGE; break; case 0x3a: - case 0x3e: // Ivy Bridge EP + case 0x3e: // Ivy Bridge EP *Type = INTEL_COREI7; // "ivybridge" *Subtype = INTEL_COREI7_IVYBRIDGE; break; @@ -423,22 +325,26 @@ static void getIntelProcessorTypeAndSubtype(unsigned int Family, break; // Skylake: - case 0x4e: - *Type = INTEL_COREI7; // "skylake-avx512" - *Subtype = INTEL_COREI7_SKYLAKE_AVX512; - break; - case 0x5e: + case 0x4e: // Skylake mobile + case 0x5e: // Skylake desktop + case 0x8e: // Kaby Lake mobile + case 0x9e: // Kaby Lake desktop *Type = INTEL_COREI7; // "skylake" *Subtype = INTEL_COREI7_SKYLAKE; break; + // Skylake Xeon: + case 0x55: + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512" + break; + case 0x1c: // Most 45 nm Intel Atom processors case 0x26: // 45 nm Atom Lincroft case 0x27: // 32 nm Atom Medfield case 0x35: // 32 nm Atom Midview case 0x36: // 32 nm Atom Midview - *Type = INTEL_ATOM; - *Subtype = INTEL_ATOM_BONNELL; + *Type = INTEL_BONNELL; break; // "bonnell" // Atom Silvermont codes from the Intel software optimization guide. @@ -448,185 +354,29 @@ static void getIntelProcessorTypeAndSubtype(unsigned int Family, case 0x5a: case 0x5d: case 0x4c: // really airmont - *Type = INTEL_ATOM; - *Subtype = INTEL_ATOM_SILVERMONT; + *Type = INTEL_SILVERMONT; break; // "silvermont" case 0x57: - *Type = INTEL_XEONPHI; // knl - *Subtype = INTEL_KNIGHTS_LANDING; + *Type = INTEL_KNL; // knl break; - default: // Unknown family 6 CPU, try to guess. - if (Features & (1 << FEATURE_AVX512)) { - *Type = INTEL_XEONPHI; // knl - *Subtype = INTEL_KNIGHTS_LANDING; - break; - } - if (Features & (1 << FEATURE_ADX)) { - *Type = INTEL_COREI7; - *Subtype = INTEL_COREI7_BROADWELL; - break; - } - if (Features & (1 << FEATURE_AVX2)) { - *Type = INTEL_COREI7; - *Subtype = INTEL_COREI7_HASWELL; - break; - } - if (Features & (1 << FEATURE_AVX)) { - *Type = INTEL_COREI7; - *Subtype = INTEL_COREI7_SANDYBRIDGE; - break; - } - if (Features & (1 << FEATURE_SSE4_2)) { - if (Features & (1 << FEATURE_MOVBE)) { - *Type = INTEL_ATOM; - *Subtype = INTEL_ATOM_SILVERMONT; - } else { - *Type = INTEL_COREI7; - *Subtype = INTEL_COREI7_NEHALEM; - } - break; - } - if (Features & (1 << FEATURE_SSE4_1)) { - *Type = INTEL_CORE2; // "penryn" - *Subtype = INTEL_CORE2_45; - break; - } - if (Features & (1 << FEATURE_SSSE3)) { - if (Features & (1 << FEATURE_MOVBE)) { - *Type = INTEL_ATOM; - *Subtype = INTEL_ATOM_BONNELL; // "bonnell" - } else { - *Type = INTEL_CORE2; // "core2" - *Subtype = INTEL_CORE2_65; - } - break; - } - if (Features & (1 << FEATURE_EM64T)) { - *Type = INTEL_X86_64; - break; // x86-64 - } - if (Features & (1 << FEATURE_SSE2)) { - *Type = INTEL_PENTIUM_M; - break; - } - if (Features & (1 << FEATURE_SSE)) { - *Type = INTEL_PENTIUM_III; - break; - } - if (Features & (1 << FEATURE_MMX)) { - *Type = INTEL_PENTIUM_II; - break; - } - *Type = INTEL_PENTIUM_PRO; - break; - } - case 15: { - switch (Model) { - case 0: // Pentium 4 processor, Intel Xeon processor. All processors are - // model 00h and manufactured using the 0.18 micron process. - case 1: // Pentium 4 processor, Intel Xeon processor, Intel Xeon - // processor MP, and Intel Celeron processor. All processors are - // model 01h and manufactured using the 0.18 micron process. - case 2: // Pentium 4 processor, Mobile Intel Pentium 4 processor - M, - // Intel Xeon processor, Intel Xeon processor MP, Intel Celeron - // processor, and Mobile Intel Celeron processor. All processors - // are model 02h and manufactured using the 0.13 micron process. - *Type = - ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV); - break; - - case 3: // Pentium 4 processor, Intel Xeon processor, Intel Celeron D - // processor. All processors are model 03h and manufactured using - // the 90 nm process. - case 4: // Pentium 4 processor, Pentium 4 processor Extreme Edition, - // Pentium D processor, Intel Xeon processor, Intel Xeon - // processor MP, Intel Celeron D processor. All processors are - // model 04h and manufactured using the 90 nm process. - case 6: // Pentium 4 processor, Pentium D processor, Pentium processor - // Extreme Edition, Intel Xeon processor, Intel Xeon processor - // MP, Intel Celeron D processor. All processors are model 06h - // and manufactured using the 65 nm process. - *Type = - ((Features & (1 << FEATURE_EM64T)) ? INTEL_NOCONA : INTEL_PRESCOTT); - break; - - default: - *Type = - ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV); + default: // Unknown family 6 CPU. break; + break; } - } default: - break; /*"generic"*/ + break; // Unknown. } } -static void getAMDProcessorTypeAndSubtype(unsigned int Family, - unsigned int Model, - unsigned int Features, unsigned *Type, +static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, + unsigned Features, unsigned *Type, unsigned *Subtype) { // FIXME: this poorly matches the generated SubtargetFeatureKV table. There // appears to be no way to generate the wide variety of AMD-specific targets // from the information returned from CPUID. switch (Family) { - case 4: - *Type = AMD_i486; - case 5: - *Type = AMDPENTIUM; - switch (Model) { - case 6: - case 7: - *Subtype = AMDPENTIUM_K6; - break; // "k6" - case 8: - *Subtype = AMDPENTIUM_K62; - break; // "k6-2" - case 9: - case 13: - *Subtype = AMDPENTIUM_K63; - break; // "k6-3" - case 10: - *Subtype = AMDPENTIUM_GEODE; - break; // "geode" - default: - break; - } - case 6: - *Type = AMDATHLON; - switch (Model) { - case 4: - *Subtype = AMDATHLON_TBIRD; - break; // "athlon-tbird" - case 6: - case 7: - case 8: - *Subtype = AMDATHLON_MP; - break; // "athlon-mp" - case 10: - *Subtype = AMDATHLON_XP; - break; // "athlon-xp" - default: - break; - } - case 15: - *Type = AMDATHLON; - if (Features & (1 << FEATURE_SSE3)) { - *Subtype = AMDATHLON_K8SSE3; - break; // "k8-sse3" - } - switch (Model) { - case 1: - *Subtype = AMDATHLON_OPTERON; - break; // "opteron" - case 5: - *Subtype = AMDATHLON_FX; - break; // "athlon-fx"; also opteron - default: - *Subtype = AMDATHLON_64; - break; // "athlon64" - } case 16: *Type = AMDFAM10H; // "amdfam10" switch (Model) { @@ -639,23 +389,16 @@ static void getAMDProcessorTypeAndSubtype(unsigned int Family, case 8: *Subtype = AMDFAM10H_ISTANBUL; break; - default: - break; } + break; case 20: - *Type = AMDFAM14H; - *Subtype = AMD_BTVER1; + *Type = AMD_BTVER1; break; // "btver1"; case 21: *Type = AMDFAM15H; - if (!(Features & - (1 << FEATURE_AVX))) { // If no AVX support, provide a sane fallback. - *Subtype = AMD_BTVER1; - break; // "btver1" - } - if (Model >= 0x50 && Model <= 0x6f) { + if (Model >= 0x60 && Model <= 0x7f) { *Subtype = AMDFAM15H_BDVER4; - break; // "bdver4"; 50h-6Fh: Excavator + break; // "bdver4"; 60h-7Fh: Excavator } if (Model >= 0x30 && Model <= 0x3f) { *Subtype = AMDFAM15H_BDVER3; @@ -671,31 +414,47 @@ static void getAMDProcessorTypeAndSubtype(unsigned int Family, } break; case 22: - *Type = AMDFAM16H; - if (!(Features & - (1 << FEATURE_AVX))) { // If no AVX support provide a sane fallback. - *Subtype = AMD_BTVER1; - break; // "btver1"; - } - *Subtype = AMD_BTVER2; + *Type = AMD_BTVER2; break; // "btver2" + case 23: + *Type = AMDFAM17H; + *Subtype = AMDFAM17H_ZNVER1; + break; default: break; // "generic" } } -static unsigned getAvailableFeatures(unsigned int ECX, unsigned int EDX, - unsigned MaxLeaf) { +static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, + unsigned *FeaturesOut) { unsigned Features = 0; - unsigned int EAX, EBX; - Features |= (((EDX >> 23) & 1) << FEATURE_MMX); - Features |= (((EDX >> 25) & 1) << FEATURE_SSE); - Features |= (((EDX >> 26) & 1) << FEATURE_SSE2); - Features |= (((ECX >> 0) & 1) << FEATURE_SSE3); - Features |= (((ECX >> 9) & 1) << FEATURE_SSSE3); - Features |= (((ECX >> 19) & 1) << FEATURE_SSE4_1); - Features |= (((ECX >> 20) & 1) << FEATURE_SSE4_2); - Features |= (((ECX >> 22) & 1) << FEATURE_MOVBE); + unsigned EAX, EBX; + + if ((EDX >> 15) & 1) + Features |= 1 << FEATURE_CMOV; + if ((EDX >> 23) & 1) + Features |= 1 << FEATURE_MMX; + if ((EDX >> 25) & 1) + Features |= 1 << FEATURE_SSE; + if ((EDX >> 26) & 1) + Features |= 1 << FEATURE_SSE2; + + if ((ECX >> 0) & 1) + Features |= 1 << FEATURE_SSE3; + if ((ECX >> 1) & 1) + Features |= 1 << FEATURE_PCLMUL; + if ((ECX >> 9) & 1) + Features |= 1 << FEATURE_SSSE3; + if ((ECX >> 12) & 1) + Features |= 1 << FEATURE_FMA; + if ((ECX >> 19) & 1) + Features |= 1 << FEATURE_SSE4_1; + if ((ECX >> 20) & 1) + Features |= 1 << FEATURE_SSE4_2; + if ((ECX >> 23) & 1) + Features |= 1 << FEATURE_POPCNT; + if ((ECX >> 25) & 1) + Features |= 1 << FEATURE_AES; // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV // indicates that the AVX registers will be saved and restored on context @@ -704,30 +463,72 @@ static unsigned getAvailableFeatures(unsigned int ECX, unsigned int EDX, bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6); bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); - bool HasLeaf7 = MaxLeaf >= 0x7; - getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); - bool HasADX = HasLeaf7 && ((EBX >> 19) & 1); - bool HasAVX2 = HasAVX && HasLeaf7 && (EBX & 0x20); - bool HasAVX512 = HasLeaf7 && HasAVX512Save && ((EBX >> 16) & 1); - Features |= (HasAVX << FEATURE_AVX); - Features |= (HasAVX2 << FEATURE_AVX2); - Features |= (HasAVX512 << FEATURE_AVX512); - Features |= (HasAVX512Save << FEATURE_AVX512SAVE); - Features |= (HasADX << FEATURE_ADX); - - getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); - Features |= (((EDX >> 29) & 0x1) << FEATURE_EM64T); - return Features; + + if (HasAVX) + Features |= 1 << FEATURE_AVX; + + bool HasLeaf7 = + MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); + + if (HasLeaf7 && ((EBX >> 3) & 1)) + Features |= 1 << FEATURE_BMI; + if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) + Features |= 1 << FEATURE_AVX2; + if (HasLeaf7 && ((EBX >> 9) & 1)) + Features |= 1 << FEATURE_BMI2; + if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512F; + if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512DQ; + if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512IFMA; + if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512PF; + if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512ER; + if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512CD; + if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512BW; + if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512VL; + + if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512VBMI; + if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512VPOPCNTDQ; + + if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX5124VNNIW; + if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX5124FMAPS; + + unsigned MaxExtLevel; + getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); + + bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && + !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); + if (HasExtLeaf1 && ((ECX >> 6) & 1)) + Features |= 1 << FEATURE_SSE4_A; + if (HasExtLeaf1 && ((ECX >> 11) & 1)) + Features |= 1 << FEATURE_XOP; + if (HasExtLeaf1 && ((ECX >> 16) & 1)) + Features |= 1 << FEATURE_FMA4; + + *FeaturesOut = Features; } -#ifdef HAVE_INIT_PRIORITY -#define CONSTRUCTOR_PRIORITY (101) +#if defined(HAVE_INIT_PRIORITY) +#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101)) +#elif __has_attribute(__constructor__) +#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__)) #else -#define CONSTRUCTOR_PRIORITY +// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that +// this runs during initialization. +#define CONSTRUCTOR_ATTRIBUTE #endif -int __cpu_indicator_init(void) - __attribute__((constructor CONSTRUCTOR_PRIORITY)); +int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE; struct __processor_model { unsigned int __cpu_vendor; @@ -742,13 +543,13 @@ struct __processor_model { the priority set. However, it still runs after ifunc initializers and needs to be called explicitly there. */ -int __attribute__((constructor CONSTRUCTOR_PRIORITY)) +int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) { - unsigned int EAX, EBX, ECX, EDX; - unsigned int MaxLeaf = 5; - unsigned int Vendor; - unsigned int Model, Family, Brand_id; - unsigned int Features = 0; + unsigned EAX, EBX, ECX, EDX; + unsigned MaxLeaf = 5; + unsigned Vendor; + unsigned Model, Family, Brand_id; + unsigned Features = 0; /* This function needs to run just once. */ if (__cpu_model.__cpu_vendor) @@ -758,9 +559,7 @@ __cpu_indicator_init(void) { return -1; /* Assume cpuid insn present. Run in level 0 to get vendor id. */ - getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX); - - if (MaxLeaf < 1) { + if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) { __cpu_model.__cpu_vendor = VENDOR_OTHER; return -1; } @@ -769,7 +568,7 @@ __cpu_indicator_init(void) { Brand_id = EBX & 0xff; /* Find available features. */ - Features = getAvailableFeatures(ECX, EDX, MaxLeaf); + getAvailableFeatures(ECX, EDX, MaxLeaf, &Features); __cpu_model.__cpu_features[0] = Features; if (Vendor == SIG_INTEL) { diff --git a/lib/libcompiler_rt/divdf3.c b/lib/libcompiler_rt/divdf3.c index ab44c2b25fe..492e32b851e 100644 --- a/lib/libcompiler_rt/divdf3.c +++ b/lib/libcompiler_rt/divdf3.c @@ -19,8 +19,6 @@ #define DOUBLE_PRECISION #include "fp_lib.h" -ARM_EABI_FNALIAS(ddiv, divdf3) - COMPILER_RT_ABI fp_t __divdf3(fp_t a, fp_t b) { @@ -183,3 +181,10 @@ __divdf3(fp_t a, fp_t b) { return result; } } + +#if defined(__ARM_EABI__) +AEABI_RTABI fp_t __aeabi_ddiv(fp_t a, fp_t b) { + return __divdf3(a, b); +} +#endif + diff --git a/lib/libcompiler_rt/divsf3.c b/lib/libcompiler_rt/divsf3.c index de2e376125b..aa6289a6d70 100644 --- a/lib/libcompiler_rt/divsf3.c +++ b/lib/libcompiler_rt/divsf3.c @@ -19,8 +19,6 @@ #define SINGLE_PRECISION #include "fp_lib.h" -ARM_EABI_FNALIAS(fdiv, divsf3) - COMPILER_RT_ABI fp_t __divsf3(fp_t a, fp_t b) { @@ -167,3 +165,10 @@ __divsf3(fp_t a, fp_t b) { return fromRep(absResult | quotientSign); } } + +#if defined(__ARM_EABI__) +AEABI_RTABI fp_t __aeabi_fdiv(fp_t a, fp_t b) { + return __divsf3(a, b); +} +#endif + diff --git a/lib/libcompiler_rt/divsi3.c b/lib/libcompiler_rt/divsi3.c index bab4aefda30..3852e3990b5 100644 --- a/lib/libcompiler_rt/divsi3.c +++ b/lib/libcompiler_rt/divsi3.c @@ -16,8 +16,6 @@ /* Returns: a / b */ -ARM_EABI_FNALIAS(idiv, divsi3) - COMPILER_RT_ABI si_int __divsi3(si_int a, si_int b) { @@ -35,3 +33,10 @@ __divsi3(si_int a, si_int b) */ return ((su_int)a/(su_int)b ^ s_a) - s_a; /* negate if s_a == -1 */ } + +#if defined(__ARM_EABI__) +AEABI_RTABI si_int __aeabi_idiv(si_int a, si_int b) { + return __divsi3(a, b); +} +#endif + diff --git a/lib/libcompiler_rt/divtc3.c b/lib/libcompiler_rt/divtc3.c index 04693df471f..16e538ba4a3 100644 --- a/lib/libcompiler_rt/divtc3.c +++ b/lib/libcompiler_rt/divtc3.c @@ -17,7 +17,7 @@ /* Returns: the quotient of (a + ib) / (c + id) */ -COMPILER_RT_ABI long double _Complex +COMPILER_RT_ABI Lcomplex __divtc3(long double __a, long double __b, long double __c, long double __d) { int __ilogbw = 0; @@ -29,31 +29,31 @@ __divtc3(long double __a, long double __b, long double __c, long double __d) __d = crt_scalbnl(__d, -__ilogbw); } long double __denom = __c * __c + __d * __d; - long double _Complex z; - __real__ z = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw); - __imag__ z = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw); - if (crt_isnan(__real__ z) && crt_isnan(__imag__ z)) + Lcomplex z; + COMPLEX_REAL(z) = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw); + COMPLEX_IMAGINARY(z) = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw); + if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) { if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b))) { - __real__ z = crt_copysignl(CRT_INFINITY, __c) * __a; - __imag__ z = crt_copysignl(CRT_INFINITY, __c) * __b; + COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a; + COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b; } else if ((crt_isinf(__a) || crt_isinf(__b)) && crt_isfinite(__c) && crt_isfinite(__d)) { __a = crt_copysignl(crt_isinf(__a) ? 1.0 : 0.0, __a); __b = crt_copysignl(crt_isinf(__b) ? 1.0 : 0.0, __b); - __real__ z = CRT_INFINITY * (__a * __c + __b * __d); - __imag__ z = CRT_INFINITY * (__b * __c - __a * __d); + COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); } else if (crt_isinf(__logbw) && __logbw > 0.0 && crt_isfinite(__a) && crt_isfinite(__b)) { __c = crt_copysignl(crt_isinf(__c) ? 1.0 : 0.0, __c); __d = crt_copysignl(crt_isinf(__d) ? 1.0 : 0.0, __d); - __real__ z = 0.0 * (__a * __c + __b * __d); - __imag__ z = 0.0 * (__b * __c - __a * __d); + COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d); } } return z; diff --git a/lib/libcompiler_rt/emutls.c b/lib/libcompiler_rt/emutls.c index eccbf53366e..12aad3a42b7 100644 --- a/lib/libcompiler_rt/emutls.c +++ b/lib/libcompiler_rt/emutls.c @@ -7,7 +7,6 @@ * * ===----------------------------------------------------------------------=== */ -#include #include #include #include @@ -15,6 +14,23 @@ #include "int_lib.h" #include "int_util.h" +typedef struct emutls_address_array { + uintptr_t size; /* number of elements in the 'data' array */ + void* data[]; +} emutls_address_array; + +static void emutls_shutdown(emutls_address_array *array); + +#ifndef _WIN32 + +#include + +static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_key_t emutls_pthread_key; + +typedef unsigned int gcc_word __attribute__((mode(word))); +typedef unsigned int gcc_pointer __attribute__((mode(pointer))); + /* Default is not to use posix_memalign, so systems like Android * can use thread local data without heavier POSIX memory allocators. */ @@ -22,26 +38,6 @@ #define EMUTLS_USE_POSIX_MEMALIGN 0 #endif -/* For every TLS variable xyz, - * there is one __emutls_control variable named __emutls_v.xyz. - * If xyz has non-zero initial value, __emutls_v.xyz's "value" - * will point to __emutls_t.xyz, which has the initial value. - */ -typedef unsigned int gcc_word __attribute__((mode(word))); -typedef struct __emutls_control { - /* Must use gcc_word here, instead of size_t, to match GCC. When - gcc_word is larger than size_t, the upper extra bits are all - zeros. We can use variables of size_t to operate on size and - align. */ - gcc_word size; /* size of the object in bytes */ - gcc_word align; /* alignment of the object in bytes */ - union { - uintptr_t index; /* data[index-1] is the object address */ - void* address; /* object address, when in single thread env */ - } object; - void* value; /* null or non-zero initial value for the object */ -} __emutls_control; - static __inline void *emutls_memalign_alloc(size_t align, size_t size) { void *base; #if EMUTLS_USE_POSIX_MEMALIGN @@ -50,7 +46,7 @@ static __inline void *emutls_memalign_alloc(size_t align, size_t size) { #else #define EXTRA_ALIGN_PTR_BYTES (align - 1 + sizeof(void*)) char* object; - if ((object = malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL) + if ((object = (char*)malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL) abort(); base = (void*)(((uintptr_t)(object + EXTRA_ALIGN_PTR_BYTES)) & ~(uintptr_t)(align - 1)); @@ -69,10 +65,207 @@ static __inline void emutls_memalign_free(void *base) { #endif } +static void emutls_key_destructor(void* ptr) { + emutls_shutdown((emutls_address_array*)ptr); + free(ptr); +} + +static __inline void emutls_init(void) { + if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0) + abort(); +} + +static __inline void emutls_init_once(void) { + static pthread_once_t once = PTHREAD_ONCE_INIT; + pthread_once(&once, emutls_init); +} + +static __inline void emutls_lock() { + pthread_mutex_lock(&emutls_mutex); +} + +static __inline void emutls_unlock() { + pthread_mutex_unlock(&emutls_mutex); +} + +static __inline void emutls_setspecific(emutls_address_array *value) { + pthread_setspecific(emutls_pthread_key, (void*) value); +} + +static __inline emutls_address_array* emutls_getspecific() { + return (emutls_address_array*) pthread_getspecific(emutls_pthread_key); +} + +#else + +#include +#include +#include +#include +#include + +static LPCRITICAL_SECTION emutls_mutex; +static DWORD emutls_tls_index = TLS_OUT_OF_INDEXES; + +typedef uintptr_t gcc_word; +typedef void * gcc_pointer; + +static void win_error(DWORD last_err, const char *hint) { + char *buffer = NULL; + if (FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_MAX_WIDTH_MASK, + NULL, last_err, 0, (LPSTR)&buffer, 1, NULL)) { + fprintf(stderr, "Windows error: %s\n", buffer); + } else { + fprintf(stderr, "Unkown Windows error: %s\n", hint); + } + LocalFree(buffer); +} + +static __inline void win_abort(DWORD last_err, const char *hint) { + win_error(last_err, hint); + abort(); +} + +static __inline void *emutls_memalign_alloc(size_t align, size_t size) { + void *base = _aligned_malloc(size, align); + if (!base) + win_abort(GetLastError(), "_aligned_malloc"); + return base; +} + +static __inline void emutls_memalign_free(void *base) { + _aligned_free(base); +} + +static void emutls_exit(void) { + if (emutls_mutex) { + DeleteCriticalSection(emutls_mutex); + _aligned_free(emutls_mutex); + emutls_mutex = NULL; + } + if (emutls_tls_index != TLS_OUT_OF_INDEXES) { + emutls_shutdown((emutls_address_array*)TlsGetValue(emutls_tls_index)); + TlsFree(emutls_tls_index); + emutls_tls_index = TLS_OUT_OF_INDEXES; + } +} + +#pragma warning (push) +#pragma warning (disable : 4100) +static BOOL CALLBACK emutls_init(PINIT_ONCE p0, PVOID p1, PVOID *p2) { + emutls_mutex = (LPCRITICAL_SECTION)_aligned_malloc(sizeof(CRITICAL_SECTION), 16); + if (!emutls_mutex) { + win_error(GetLastError(), "_aligned_malloc"); + return FALSE; + } + InitializeCriticalSection(emutls_mutex); + + emutls_tls_index = TlsAlloc(); + if (emutls_tls_index == TLS_OUT_OF_INDEXES) { + emutls_exit(); + win_error(GetLastError(), "TlsAlloc"); + return FALSE; + } + atexit(&emutls_exit); + return TRUE; +} + +static __inline void emutls_init_once(void) { + static INIT_ONCE once; + InitOnceExecuteOnce(&once, emutls_init, NULL, NULL); +} + +static __inline void emutls_lock() { + EnterCriticalSection(emutls_mutex); +} + +static __inline void emutls_unlock() { + LeaveCriticalSection(emutls_mutex); +} + +static __inline void emutls_setspecific(emutls_address_array *value) { + if (TlsSetValue(emutls_tls_index, (LPVOID) value) == 0) + win_abort(GetLastError(), "TlsSetValue"); +} + +static __inline emutls_address_array* emutls_getspecific() { + LPVOID value = TlsGetValue(emutls_tls_index); + if (value == NULL) { + const DWORD err = GetLastError(); + if (err != ERROR_SUCCESS) + win_abort(err, "TlsGetValue"); + } + return (emutls_address_array*) value; +} + +/* Provide atomic load/store functions for emutls_get_index if built with MSVC. + */ +#if !defined(__ATOMIC_RELEASE) + +enum { __ATOMIC_ACQUIRE = 2, __ATOMIC_RELEASE = 3 }; + +static __inline uintptr_t __atomic_load_n(void *ptr, unsigned type) { + assert(type == __ATOMIC_ACQUIRE); +#ifdef _WIN64 + return (uintptr_t) _load_be_u64(ptr); +#else + return (uintptr_t) _load_be_u32(ptr); +#endif +} + +static __inline void __atomic_store_n(void *ptr, uintptr_t val, unsigned type) { + assert(type == __ATOMIC_RELEASE); +#ifdef _WIN64 + _store_be_u64(ptr, val); +#else + _store_be_u32(ptr, val); +#endif +} + +#endif + +#pragma warning (pop) + +#endif + +static size_t emutls_num_object = 0; /* number of allocated TLS objects */ + +/* Free the allocated TLS data + */ +static void emutls_shutdown(emutls_address_array *array) { + if (array) { + uintptr_t i; + for (i = 0; i < array->size; ++i) { + if (array->data[i]) + emutls_memalign_free(array->data[i]); + } + } +} + +/* For every TLS variable xyz, + * there is one __emutls_control variable named __emutls_v.xyz. + * If xyz has non-zero initial value, __emutls_v.xyz's "value" + * will point to __emutls_t.xyz, which has the initial value. + */ +typedef struct __emutls_control { + /* Must use gcc_word here, instead of size_t, to match GCC. When + gcc_word is larger than size_t, the upper extra bits are all + zeros. We can use variables of size_t to operate on size and + align. */ + gcc_word size; /* size of the object in bytes */ + gcc_word align; /* alignment of the object in bytes */ + union { + uintptr_t index; /* data[index-1] is the object address */ + void* address; /* object address, when in single thread env */ + } object; + void* value; /* null or non-zero initial value for the object */ +} __emutls_control; + /* Emulated TLS objects are always allocated at run-time. */ static __inline void *emutls_allocate_object(__emutls_control *control) { /* Use standard C types, check with gcc's emutls.o. */ - typedef unsigned int gcc_pointer __attribute__((mode(pointer))); COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(gcc_pointer)); COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(void*)); @@ -93,45 +286,19 @@ static __inline void *emutls_allocate_object(__emutls_control *control) { return base; } -static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER; - -static size_t emutls_num_object = 0; /* number of allocated TLS objects */ - -typedef struct emutls_address_array { - uintptr_t size; /* number of elements in the 'data' array */ - void* data[]; -} emutls_address_array; - -static pthread_key_t emutls_pthread_key; - -static void emutls_key_destructor(void* ptr) { - emutls_address_array* array = (emutls_address_array*)ptr; - uintptr_t i; - for (i = 0; i < array->size; ++i) { - if (array->data[i]) - emutls_memalign_free(array->data[i]); - } - free(ptr); -} - -static void emutls_init(void) { - if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0) - abort(); -} /* Returns control->object.index; set index if not allocated yet. */ static __inline uintptr_t emutls_get_index(__emutls_control *control) { uintptr_t index = __atomic_load_n(&control->object.index, __ATOMIC_ACQUIRE); if (!index) { - static pthread_once_t once = PTHREAD_ONCE_INIT; - pthread_once(&once, emutls_init); - pthread_mutex_lock(&emutls_mutex); + emutls_init_once(); + emutls_lock(); index = control->object.index; if (!index) { index = ++emutls_num_object; __atomic_store_n(&control->object.index, index, __ATOMIC_RELEASE); } - pthread_mutex_unlock(&emutls_mutex); + emutls_unlock(); } return index; } @@ -142,7 +309,7 @@ static __inline void emutls_check_array_set_size(emutls_address_array *array, if (array == NULL) abort(); array->size = size; - pthread_setspecific(emutls_pthread_key, (void*)array); + emutls_setspecific(array); } /* Returns the new 'data' array size, number of elements, @@ -156,22 +323,29 @@ static __inline uintptr_t emutls_new_data_array_size(uintptr_t index) { return ((index + 1 + 15) & ~((uintptr_t)15)) - 1; } +/* Returns the size in bytes required for an emutls_address_array with + * N number of elements for data field. + */ +static __inline uintptr_t emutls_asize(uintptr_t N) { + return N * sizeof(void *) + sizeof(emutls_address_array); +} + /* Returns the thread local emutls_address_array. * Extends its size if necessary to hold address at index. */ static __inline emutls_address_array * emutls_get_address_array(uintptr_t index) { - emutls_address_array* array = pthread_getspecific(emutls_pthread_key); + emutls_address_array* array = emutls_getspecific(); if (array == NULL) { uintptr_t new_size = emutls_new_data_array_size(index); - array = malloc(new_size * sizeof(void *) + sizeof(emutls_address_array)); + array = (emutls_address_array*) malloc(emutls_asize(new_size)); if (array) memset(array->data, 0, new_size * sizeof(void*)); emutls_check_array_set_size(array, new_size); } else if (index > array->size) { uintptr_t orig_size = array->size; uintptr_t new_size = emutls_new_data_array_size(index); - array = realloc(array, new_size * sizeof(void *) + sizeof(emutls_address_array)); + array = (emutls_address_array*) realloc(array, emutls_asize(new_size)); if (array) memset(array->data + orig_size, 0, (new_size - orig_size) * sizeof(void*)); @@ -182,8 +356,8 @@ emutls_get_address_array(uintptr_t index) { void* __emutls_get_address(__emutls_control* control) { uintptr_t index = emutls_get_index(control); - emutls_address_array* array = emutls_get_address_array(index); - if (array->data[index - 1] == NULL) - array->data[index - 1] = emutls_allocate_object(control); - return array->data[index - 1]; + emutls_address_array* array = emutls_get_address_array(index--); + if (array->data[index] == NULL) + array->data[index] = emutls_allocate_object(control); + return array->data[index]; } diff --git a/lib/libcompiler_rt/extendhfsf2.c b/lib/libcompiler_rt/extendhfsf2.c index 27115a48c18..e7d9fde8abf 100644 --- a/lib/libcompiler_rt/extendhfsf2.c +++ b/lib/libcompiler_rt/extendhfsf2.c @@ -12,8 +12,6 @@ #define DST_SINGLE #include "fp_extend_impl.inc" -ARM_EABI_FNALIAS(h2f, extendhfsf2) - // Use a forwarding definition and noinline to implement a poor man's alias, // as there isn't a good cross-platform way of defining one. COMPILER_RT_ABI NOINLINE float __extendhfsf2(uint16_t a) { @@ -23,3 +21,10 @@ COMPILER_RT_ABI NOINLINE float __extendhfsf2(uint16_t a) { COMPILER_RT_ABI float __gnu_h2f_ieee(uint16_t a) { return __extendhfsf2(a); } + +#if defined(__ARM_EABI__) +AEABI_RTABI float __aeabi_h2f(uint16_t a) { + return __extendhfsf2(a); +} +#endif + diff --git a/lib/libcompiler_rt/extendsfdf2.c b/lib/libcompiler_rt/extendsfdf2.c index 7a267c2f47a..b9e7a7471a9 100644 --- a/lib/libcompiler_rt/extendsfdf2.c +++ b/lib/libcompiler_rt/extendsfdf2.c @@ -12,8 +12,13 @@ #define DST_DOUBLE #include "fp_extend_impl.inc" -ARM_EABI_FNALIAS(f2d, extendsfdf2) - COMPILER_RT_ABI double __extendsfdf2(float a) { return __extendXfYf2__(a); } + +#if defined(__ARM_EABI__) +AEABI_RTABI double __aeabi_f2d(float a) { + return __extendsfdf2(a); +} +#endif + diff --git a/lib/libcompiler_rt/ffssi2.c b/lib/libcompiler_rt/ffssi2.c new file mode 100644 index 00000000000..e5180eff5e0 --- /dev/null +++ b/lib/libcompiler_rt/ffssi2.c @@ -0,0 +1,29 @@ +/* ===-- ffssi2.c - Implement __ffssi2 -------------------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __ffssi2 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +/* Returns: the index of the least significant 1-bit in a, or + * the value zero if a is zero. The least significant bit is index one. + */ + +COMPILER_RT_ABI si_int +__ffssi2(si_int a) +{ + if (a == 0) + { + return 0; + } + return __builtin_ctz(a) + 1; +} diff --git a/lib/libcompiler_rt/fixdfdi.c b/lib/libcompiler_rt/fixdfdi.c index 14283ef42e6..31d76df2825 100644 --- a/lib/libcompiler_rt/fixdfdi.c +++ b/lib/libcompiler_rt/fixdfdi.c @@ -10,7 +10,6 @@ #define DOUBLE_PRECISION #include "fp_lib.h" -ARM_EABI_FNALIAS(d2lz, fixdfdi) #ifndef __SOFT_FP__ /* Support for systems that have hardware floating-point; can set the invalid @@ -44,3 +43,15 @@ __fixdfdi(fp_t a) { } #endif + +#if defined(__ARM_EABI__) +AEABI_RTABI di_int +#if defined(__SOFT_FP__) +__aeabi_d2lz(fp_t a) { +#else +__aeabi_d2lz(double a) { +#endif + return __fixdfdi(a); +} +#endif + diff --git a/lib/libcompiler_rt/fixdfsi.c b/lib/libcompiler_rt/fixdfsi.c index 704e65bc43a..fc316dcd054 100644 --- a/lib/libcompiler_rt/fixdfsi.c +++ b/lib/libcompiler_rt/fixdfsi.c @@ -14,9 +14,14 @@ typedef si_int fixint_t; typedef su_int fixuint_t; #include "fp_fixint_impl.inc" -ARM_EABI_FNALIAS(d2iz, fixdfsi) - COMPILER_RT_ABI si_int __fixdfsi(fp_t a) { return __fixint(a); } + +#if defined(__ARM_EABI__) +AEABI_RTABI si_int __aeabi_d2iz(fp_t a) { + return __fixdfsi(a); +} +#endif + diff --git a/lib/libcompiler_rt/fixsfdi.c b/lib/libcompiler_rt/fixsfdi.c index fab47e272a2..c43473637d6 100644 --- a/lib/libcompiler_rt/fixsfdi.c +++ b/lib/libcompiler_rt/fixsfdi.c @@ -11,8 +11,6 @@ #define SINGLE_PRECISION #include "fp_lib.h" -ARM_EABI_FNALIAS(f2lz, fixsfdi) - #ifndef __SOFT_FP__ /* Support for systems that have hardware floating-point; can set the invalid * flag as a side-effect of computation. @@ -45,3 +43,15 @@ __fixsfdi(fp_t a) { } #endif + +#if defined(__ARM_EABI__) +AEABI_RTABI di_int +#if defined(__SOFT_FP__) +__aeabi_f2lz(fp_t a) { +#else +__aeabi_f2lz(float a) { +#endif + return __fixsfdi(a); +} +#endif + diff --git a/lib/libcompiler_rt/fixsfsi.c b/lib/libcompiler_rt/fixsfsi.c index f045536d685..3276df96646 100644 --- a/lib/libcompiler_rt/fixsfsi.c +++ b/lib/libcompiler_rt/fixsfsi.c @@ -14,9 +14,14 @@ typedef si_int fixint_t; typedef su_int fixuint_t; #include "fp_fixint_impl.inc" -ARM_EABI_FNALIAS(f2iz, fixsfsi) - COMPILER_RT_ABI si_int __fixsfsi(fp_t a) { return __fixint(a); } + +#if defined(__ARM_EABI__) +AEABI_RTABI si_int __aeabi_f2iz(fp_t a) { + return __fixsfsi(a); +} +#endif + diff --git a/lib/libcompiler_rt/fixunsdfdi.c b/lib/libcompiler_rt/fixunsdfdi.c index 4b0bc9e1d05..b734409709b 100644 --- a/lib/libcompiler_rt/fixunsdfdi.c +++ b/lib/libcompiler_rt/fixunsdfdi.c @@ -11,8 +11,6 @@ #define DOUBLE_PRECISION #include "fp_lib.h" -ARM_EABI_FNALIAS(d2ulz, fixunsdfdi) - #ifndef __SOFT_FP__ /* Support for systems that have hardware floating-point; can set the invalid * flag as a side-effect of computation. @@ -42,3 +40,15 @@ __fixunsdfdi(fp_t a) { } #endif + +#if defined(__ARM_EABI__) +AEABI_RTABI du_int +#if defined(__SOFT_FP__) +__aeabi_d2ulz(fp_t a) { +#else +__aeabi_d2ulz(double a) { +#endif + return __fixunsdfdi(a); +} +#endif + diff --git a/lib/libcompiler_rt/fixunsdfsi.c b/lib/libcompiler_rt/fixunsdfsi.c index 232d342d77d..bb3d8e0f831 100644 --- a/lib/libcompiler_rt/fixunsdfsi.c +++ b/lib/libcompiler_rt/fixunsdfsi.c @@ -13,9 +13,14 @@ typedef su_int fixuint_t; #include "fp_fixuint_impl.inc" -ARM_EABI_FNALIAS(d2uiz, fixunsdfsi) - COMPILER_RT_ABI su_int __fixunsdfsi(fp_t a) { return __fixuint(a); } + +#if defined(__ARM_EABI__) +AEABI_RTABI su_int __aeabi_d2uiz(fp_t a) { + return __fixunsdfsi(a); +} +#endif + diff --git a/lib/libcompiler_rt/fixunssfdi.c b/lib/libcompiler_rt/fixunssfdi.c index f8ebab854f9..5d92245df0d 100644 --- a/lib/libcompiler_rt/fixunssfdi.c +++ b/lib/libcompiler_rt/fixunssfdi.c @@ -11,8 +11,6 @@ #define SINGLE_PRECISION #include "fp_lib.h" -ARM_EABI_FNALIAS(f2ulz, fixunssfdi) - #ifndef __SOFT_FP__ /* Support for systems that have hardware floating-point; can set the invalid * flag as a side-effect of computation. @@ -43,3 +41,15 @@ __fixunssfdi(fp_t a) { } #endif + +#if defined(__ARM_EABI__) +AEABI_RTABI du_int +#if defined(__SOFT_FP__) +__aeabi_f2ulz(fp_t a) { +#else +__aeabi_f2ulz(float a) { +#endif + return __fixunssfdi(a); +} +#endif + diff --git a/lib/libcompiler_rt/fixunssfsi.c b/lib/libcompiler_rt/fixunssfsi.c index cc2b05bd84f..91d5e8ae5d7 100644 --- a/lib/libcompiler_rt/fixunssfsi.c +++ b/lib/libcompiler_rt/fixunssfsi.c @@ -17,9 +17,14 @@ typedef su_int fixuint_t; #include "fp_fixuint_impl.inc" -ARM_EABI_FNALIAS(f2uiz, fixunssfsi) - COMPILER_RT_ABI su_int __fixunssfsi(fp_t a) { return __fixuint(a); } + +#if defined(__ARM_EABI__) +AEABI_RTABI su_int __aeabi_f2uiz(fp_t a) { + return __fixunssfsi(a); +} +#endif + diff --git a/lib/libcompiler_rt/floatdidf.c b/lib/libcompiler_rt/floatdidf.c index 2b023ad08be..681fecef968 100644 --- a/lib/libcompiler_rt/floatdidf.c +++ b/lib/libcompiler_rt/floatdidf.c @@ -22,8 +22,6 @@ /* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */ -ARM_EABI_FNALIAS(l2d, floatdidf) - #ifndef __SOFT_FP__ /* Support for systems that have hardware floating-point; we'll set the inexact flag * as a side-effect of this computation. @@ -105,3 +103,10 @@ __floatdidf(di_int a) return fb.f; } #endif + +#if defined(__ARM_EABI__) +AEABI_RTABI double __aeabi_l2d(di_int a) { + return __floatdidf(a); +} +#endif + diff --git a/lib/libcompiler_rt/floatdisf.c b/lib/libcompiler_rt/floatdisf.c index 3e47580ef57..dd548165c37 100644 --- a/lib/libcompiler_rt/floatdisf.c +++ b/lib/libcompiler_rt/floatdisf.c @@ -22,8 +22,6 @@ #include "int_lib.h" -ARM_EABI_FNALIAS(l2f, floatdisf) - COMPILER_RT_ABI float __floatdisf(di_int a) { @@ -78,3 +76,10 @@ __floatdisf(di_int a) ((su_int)a & 0x007FFFFF); /* mantissa */ return fb.f; } + +#if defined(__ARM_EABI__) +AEABI_RTABI float __aeabi_l2f(di_int a) { + return __floatdisf(a); +} +#endif + diff --git a/lib/libcompiler_rt/floatsidf.c b/lib/libcompiler_rt/floatsidf.c index 1cf99b782a6..2ae395bdc1d 100644 --- a/lib/libcompiler_rt/floatsidf.c +++ b/lib/libcompiler_rt/floatsidf.c @@ -18,8 +18,6 @@ #include "int_lib.h" -ARM_EABI_FNALIAS(i2d, floatsidf) - COMPILER_RT_ABI fp_t __floatsidf(int a) { @@ -51,3 +49,10 @@ __floatsidf(int a) { // Insert the sign bit and return return fromRep(result | sign); } + +#if defined(__ARM_EABI__) +AEABI_RTABI fp_t __aeabi_i2d(int a) { + return __floatsidf(a); +} +#endif + diff --git a/lib/libcompiler_rt/floatsisf.c b/lib/libcompiler_rt/floatsisf.c index 467dd1d1eaf..08891fcdf20 100644 --- a/lib/libcompiler_rt/floatsisf.c +++ b/lib/libcompiler_rt/floatsisf.c @@ -18,8 +18,6 @@ #include "int_lib.h" -ARM_EABI_FNALIAS(i2f, floatsisf) - COMPILER_RT_ABI fp_t __floatsisf(int a) { @@ -57,3 +55,10 @@ __floatsisf(int a) { // Insert the sign bit and return return fromRep(result | sign); } + +#if defined(__ARM_EABI__) +AEABI_RTABI fp_t __aeabi_i2f(int a) { + return __floatsisf(a); +} +#endif + diff --git a/lib/libcompiler_rt/floattitf.c b/lib/libcompiler_rt/floattitf.c new file mode 100644 index 00000000000..994fded3947 --- /dev/null +++ b/lib/libcompiler_rt/floattitf.c @@ -0,0 +1,82 @@ +//===-- lib/floattitf.c - int128 -> quad-precision conversion -----*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ti_int to quad-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" +#include "int_lib.h" + +/* Returns: convert a ti_int to a fp_t, rounding toward even. */ + +/* Assumption: fp_t is a IEEE 128 bit floating point type + * ti_int is a 128 bit integral type + */ + +/* seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | + * mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + */ + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +COMPILER_RT_ABI fp_t +__floattitf(ti_int a) { + if (a == 0) + return 0.0; + const unsigned N = sizeof(ti_int) * CHAR_BIT; + const ti_int s = a >> (N-1); + a = (a ^ s) - s; + int sd = N - __clzti2(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > LDBL_MANT_DIG) { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit LDBL_MANT_DIG-1 bits to the right of 1 + * Q = bit LDBL_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) { + case LDBL_MANT_DIG + 1: + a <<= 1; + break; + case LDBL_MANT_DIG + 2: + break; + default: + a = ((tu_int)a >> (sd - (LDBL_MANT_DIG+2))) | + ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */ + if (a & ((tu_int)1 << LDBL_MANT_DIG)) { + a >>= 1; + ++e; + } + /* a is now rounded to LDBL_MANT_DIG bits */ + } else { + a <<= (LDBL_MANT_DIG - sd); + /* a is now rounded to LDBL_MANT_DIG bits */ + } + + long_double_bits fb; + fb.u.high.all = (s & 0x8000000000000000LL) /* sign */ + | (du_int)(e + 16383) << 48 /* exponent */ + | ((a >> 64) & 0x0000ffffffffffffLL); /* significand */ + fb.u.low.all = (du_int)(a); + return fb.f; +} + +#endif diff --git a/lib/libcompiler_rt/floatundidf.c b/lib/libcompiler_rt/floatundidf.c index cfd3a7a3b33..6c1a931ef2f 100644 --- a/lib/libcompiler_rt/floatundidf.c +++ b/lib/libcompiler_rt/floatundidf.c @@ -22,8 +22,6 @@ #include "int_lib.h" -ARM_EABI_FNALIAS(ul2d, floatundidf) - #ifndef __SOFT_FP__ /* Support for systems that have hardware floating-point; we'll set the inexact flag * as a side-effect of this computation. @@ -104,3 +102,10 @@ __floatundidf(du_int a) return fb.f; } #endif + +#if defined(__ARM_EABI__) +AEABI_RTABI double __aeabi_ul2d(du_int a) { + return __floatundidf(a); +} +#endif + diff --git a/lib/libcompiler_rt/floatundisf.c b/lib/libcompiler_rt/floatundisf.c index 713a44abc8b..86841a75dc6 100644 --- a/lib/libcompiler_rt/floatundisf.c +++ b/lib/libcompiler_rt/floatundisf.c @@ -22,8 +22,6 @@ #include "int_lib.h" -ARM_EABI_FNALIAS(ul2f, floatundisf) - COMPILER_RT_ABI float __floatundisf(du_int a) { @@ -75,3 +73,10 @@ __floatundisf(du_int a) ((su_int)a & 0x007FFFFF); /* mantissa */ return fb.f; } + +#if defined(__ARM_EABI__) +AEABI_RTABI float __aeabi_ul2f(du_int a) { + return __floatundisf(a); +} +#endif + diff --git a/lib/libcompiler_rt/floatunsidf.c b/lib/libcompiler_rt/floatunsidf.c index 445e18041c4..8d4807194f0 100644 --- a/lib/libcompiler_rt/floatunsidf.c +++ b/lib/libcompiler_rt/floatunsidf.c @@ -18,8 +18,6 @@ #include "int_lib.h" -ARM_EABI_FNALIAS(ui2d, floatunsidf) - COMPILER_RT_ABI fp_t __floatunsidf(unsigned int a) { @@ -40,3 +38,10 @@ __floatunsidf(unsigned int a) { result += (rep_t)(exponent + exponentBias) << significandBits; return fromRep(result); } + +#if defined(__ARM_EABI__) +AEABI_RTABI fp_t __aeabi_ui2d(unsigned int a) { + return __floatunsidf(a); +} +#endif + diff --git a/lib/libcompiler_rt/floatunsisf.c b/lib/libcompiler_rt/floatunsisf.c index ea6f161adc0..f194c046d2f 100644 --- a/lib/libcompiler_rt/floatunsisf.c +++ b/lib/libcompiler_rt/floatunsisf.c @@ -18,8 +18,6 @@ #include "int_lib.h" -ARM_EABI_FNALIAS(ui2f, floatunsisf) - COMPILER_RT_ABI fp_t __floatunsisf(unsigned int a) { @@ -48,3 +46,10 @@ __floatunsisf(unsigned int a) { result += (rep_t)(exponent + exponentBias) << significandBits; return fromRep(result); } + +#if defined(__ARM_EABI__) +AEABI_RTABI fp_t __aeabi_ui2f(unsigned int a) { + return __floatunsisf(a); +} +#endif + diff --git a/lib/libcompiler_rt/floatuntitf.c b/lib/libcompiler_rt/floatuntitf.c new file mode 100644 index 00000000000..e2518c93f23 --- /dev/null +++ b/lib/libcompiler_rt/floatuntitf.c @@ -0,0 +1,79 @@ +//===-- lib/floatuntitf.c - uint128 -> quad-precision conversion --*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements tu_int to quad-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" +#include "int_lib.h" + +/* Returns: convert a tu_int to a fp_t, rounding toward even. */ + +/* Assumption: fp_t is a IEEE 128 bit floating point type + * tu_int is a 128 bit integral type + */ + +/* seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | + * mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + */ + +#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) +COMPILER_RT_ABI fp_t +__floatuntitf(tu_int a) { + if (a == 0) + return 0.0; + const unsigned N = sizeof(tu_int) * CHAR_BIT; + int sd = N - __clzti2(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > LDBL_MANT_DIG) { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit LDBL_MANT_DIG-1 bits to the right of 1 + * Q = bit LDBL_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) { + case LDBL_MANT_DIG + 1: + a <<= 1; + break; + case LDBL_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (LDBL_MANT_DIG+2))) | + ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */ + if (a & ((tu_int)1 << LDBL_MANT_DIG)) { + a >>= 1; + ++e; + } + /* a is now rounded to LDBL_MANT_DIG bits */ + } else { + a <<= (LDBL_MANT_DIG - sd); + /* a is now rounded to LDBL_MANT_DIG bits */ + } + + long_double_bits fb; + fb.u.high.all = (du_int)(e + 16383) << 48 /* exponent */ + | ((a >> 64) & 0x0000ffffffffffffLL); /* significand */ + fb.u.low.all = (du_int)(a); + return fb.f; +} + +#endif diff --git a/lib/libcompiler_rt/i386/Makefile.mk b/lib/libcompiler_rt/i386/Makefile.mk deleted file mode 100644 index f3776a02c0d..00000000000 --- a/lib/libcompiler_rt/i386/Makefile.mk +++ /dev/null @@ -1,20 +0,0 @@ -#===- lib/builtins/i386/Makefile.mk ------------------------*- Makefile -*--===# -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -#===------------------------------------------------------------------------===# - -ModuleName := builtins -SubDirs := -OnlyArchs := i386 - -AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file))) -Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) -ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o) -Implementation := Optimized - -# FIXME: use automatic dependencies? -Dependencies := $(wildcard lib/*.h $(Dir)/*.h) diff --git a/lib/libcompiler_rt/int_lib.h b/lib/libcompiler_rt/int_lib.h index 8dfe5672d13..9a8092d50d8 100644 --- a/lib/libcompiler_rt/int_lib.h +++ b/lib/libcompiler_rt/int_lib.h @@ -30,14 +30,17 @@ /* ABI macro definitions */ #if __ARM_EABI__ -# define ARM_EABI_FNALIAS(aeabi_name, name) \ - void __aeabi_##aeabi_name() __attribute__((alias("__" #name))); -# define COMPILER_RT_ABI __attribute__((pcs("aapcs"))) +# ifdef COMPILER_RT_ARMHF_TARGET +# define COMPILER_RT_ABI +# else +# define COMPILER_RT_ABI __attribute__((__pcs__("aapcs"))) +# endif #else -# define ARM_EABI_FNALIAS(aeabi_name, name) # define COMPILER_RT_ABI #endif +#define AEABI_RTABI __attribute__((__pcs__("aapcs"))) + #ifdef _MSC_VER #define ALWAYS_INLINE __forceinline #define NOINLINE __declspec(noinline) @@ -91,14 +94,14 @@ COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem); #include uint32_t __inline __builtin_ctz(uint32_t value) { - uint32_t trailing_zero = 0; + unsigned long trailing_zero = 0; if (_BitScanForward(&trailing_zero, value)) return trailing_zero; return 32; } uint32_t __inline __builtin_clz(uint32_t value) { - uint32_t leading_zero = 0; + unsigned long leading_zero = 0; if (_BitScanReverse(&leading_zero, value)) return 31 - leading_zero; return 32; @@ -106,7 +109,7 @@ uint32_t __inline __builtin_clz(uint32_t value) { #if defined(_M_ARM) || defined(_M_X64) uint32_t __inline __builtin_clzll(uint64_t value) { - uint32_t leading_zero = 0; + unsigned long leading_zero = 0; if (_BitScanReverse64(&leading_zero, value)) return 63 - leading_zero; return 64; diff --git a/lib/libcompiler_rt/int_types.h b/lib/libcompiler_rt/int_types.h index 660385ecd6a..a92238c5b73 100644 --- a/lib/libcompiler_rt/int_types.h +++ b/lib/libcompiler_rt/int_types.h @@ -60,9 +60,7 @@ typedef union }s; } udwords; -/* MIPS64 issue: PR 20098 */ -#if (defined(__LP64__) || defined(__wasm__)) && \ - !(defined(__mips__) && defined(__clang__)) +#if (defined(__LP64__) || defined(__wasm__) || defined(__mips64)) #define CRT_HAS_128BIT #endif diff --git a/lib/libcompiler_rt/int_util.c b/lib/libcompiler_rt/int_util.c index 420d1e237aa..de87410dbca 100644 --- a/lib/libcompiler_rt/int_util.c +++ b/lib/libcompiler_rt/int_util.c @@ -45,6 +45,16 @@ void compilerrt_abort_impl(const char *file, int line, const char *function) { __assert_rtn(function, file, line, "libcompiler_rt abort"); } +#elif __Fuchsia__ + +#ifndef _WIN32 +__attribute__((weak)) +__attribute__((visibility("hidden"))) +#endif +void compilerrt_abort_impl(const char *file, int line, const char *function) { + __builtin_trap(); +} + #else /* Get the system definition of abort() */ diff --git a/lib/libcompiler_rt/lshrdi3.c b/lib/libcompiler_rt/lshrdi3.c index 6b1ea923b77..becbbef4eb0 100644 --- a/lib/libcompiler_rt/lshrdi3.c +++ b/lib/libcompiler_rt/lshrdi3.c @@ -18,8 +18,6 @@ /* Precondition: 0 <= b < bits_in_dword */ -ARM_EABI_FNALIAS(llsr, lshrdi3) - COMPILER_RT_ABI di_int __lshrdi3(di_int a, si_int b) { @@ -41,3 +39,10 @@ __lshrdi3(di_int a, si_int b) } return result.all; } + +#if defined(__ARM_EABI__) +AEABI_RTABI di_int __aeabi_llsr(di_int a, si_int b) { + return __lshrdi3(a, b); +} +#endif + diff --git a/lib/libcompiler_rt/mingw_fixfloat.c b/lib/libcompiler_rt/mingw_fixfloat.c new file mode 100644 index 00000000000..c462e0dbf65 --- /dev/null +++ b/lib/libcompiler_rt/mingw_fixfloat.c @@ -0,0 +1,36 @@ +/* ===-- mingw_fixfloat.c - Wrap int/float conversions for arm/windows -----=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + */ + +#include "int_lib.h" + +COMPILER_RT_ABI di_int __fixdfdi(double a); +COMPILER_RT_ABI di_int __fixsfdi(float a); +COMPILER_RT_ABI du_int __fixunsdfdi(double a); +COMPILER_RT_ABI du_int __fixunssfdi(float a); +COMPILER_RT_ABI double __floatdidf(di_int a); +COMPILER_RT_ABI float __floatdisf(di_int a); +COMPILER_RT_ABI double __floatundidf(du_int a); +COMPILER_RT_ABI float __floatundisf(du_int a); + +COMPILER_RT_ABI di_int __dtoi64(double a) { return __fixdfdi(a); } + +COMPILER_RT_ABI di_int __stoi64(float a) { return __fixsfdi(a); } + +COMPILER_RT_ABI du_int __dtou64(double a) { return __fixunsdfdi(a); } + +COMPILER_RT_ABI du_int __stou64(float a) { return __fixunssfdi(a); } + +COMPILER_RT_ABI double __i64tod(di_int a) { return __floatdidf(a); } + +COMPILER_RT_ABI float __i64tos(di_int a) { return __floatdisf(a); } + +COMPILER_RT_ABI double __u64tod(du_int a) { return __floatundidf(a); } + +COMPILER_RT_ABI float __u64tos(du_int a) { return __floatundisf(a); } diff --git a/lib/libcompiler_rt/muldf3.c b/lib/libcompiler_rt/muldf3.c index 1eb733849e5..59a60190eba 100644 --- a/lib/libcompiler_rt/muldf3.c +++ b/lib/libcompiler_rt/muldf3.c @@ -15,8 +15,13 @@ #define DOUBLE_PRECISION #include "fp_mul_impl.inc" -ARM_EABI_FNALIAS(dmul, muldf3) - COMPILER_RT_ABI fp_t __muldf3(fp_t a, fp_t b) { return __mulXf3__(a, b); } + +#if defined(__ARM_EABI__) +AEABI_RTABI fp_t __aeabi_dmul(fp_t a, fp_t b) { + return __muldf3(a, b); +} +#endif + diff --git a/lib/libcompiler_rt/muldi3.c b/lib/libcompiler_rt/muldi3.c index 2dae44c11b9..6818a9e2f72 100644 --- a/lib/libcompiler_rt/muldi3.c +++ b/lib/libcompiler_rt/muldi3.c @@ -40,8 +40,6 @@ __muldsi3(su_int a, su_int b) /* Returns: a * b */ -ARM_EABI_FNALIAS(lmul, muldi3) - COMPILER_RT_ABI di_int __muldi3(di_int a, di_int b) { @@ -54,3 +52,10 @@ __muldi3(di_int a, di_int b) r.s.high += x.s.high * y.s.low + x.s.low * y.s.high; return r.all; } + +#if defined(__ARM_EABI__) +AEABI_RTABI di_int __aeabi_lmul(di_int a, di_int b) { + return __muldi3(a, b); +} +#endif + diff --git a/lib/libcompiler_rt/mulsf3.c b/lib/libcompiler_rt/mulsf3.c index 478b3bc0e0e..f141af1acc5 100644 --- a/lib/libcompiler_rt/mulsf3.c +++ b/lib/libcompiler_rt/mulsf3.c @@ -15,8 +15,13 @@ #define SINGLE_PRECISION #include "fp_mul_impl.inc" -ARM_EABI_FNALIAS(fmul, mulsf3) - COMPILER_RT_ABI fp_t __mulsf3(fp_t a, fp_t b) { return __mulXf3__(a, b); } + +#if defined(__ARM_EABI__) +AEABI_RTABI fp_t __aeabi_fmul(fp_t a, fp_t b) { + return __mulsf3(a, b); +} +#endif + diff --git a/lib/libcompiler_rt/negdf2.c b/lib/libcompiler_rt/negdf2.c index d634b421cb7..5e2544cdb4b 100644 --- a/lib/libcompiler_rt/negdf2.c +++ b/lib/libcompiler_rt/negdf2.c @@ -14,9 +14,14 @@ #define DOUBLE_PRECISION #include "fp_lib.h" -ARM_EABI_FNALIAS(dneg, negdf2) - COMPILER_RT_ABI fp_t __negdf2(fp_t a) { return fromRep(toRep(a) ^ signBit); } + +#if defined(__ARM_EABI__) +AEABI_RTABI fp_t __aeabi_dneg(fp_t a) { + return __negdf2(a); +} +#endif + diff --git a/lib/libcompiler_rt/negsf2.c b/lib/libcompiler_rt/negsf2.c index 29c17be4145..f90b3433568 100644 --- a/lib/libcompiler_rt/negsf2.c +++ b/lib/libcompiler_rt/negsf2.c @@ -14,9 +14,14 @@ #define SINGLE_PRECISION #include "fp_lib.h" -ARM_EABI_FNALIAS(fneg, negsf2) - COMPILER_RT_ABI fp_t __negsf2(fp_t a) { return fromRep(toRep(a) ^ signBit); } + +#if defined(__ARM_EABI__) +AEABI_RTABI fp_t __aeabi_fneg(fp_t a) { + return __negsf2(a); +} +#endif + diff --git a/lib/libcompiler_rt/ppc/Makefile.mk b/lib/libcompiler_rt/ppc/Makefile.mk deleted file mode 100644 index 0adc623aa04..00000000000 --- a/lib/libcompiler_rt/ppc/Makefile.mk +++ /dev/null @@ -1,20 +0,0 @@ -#===- lib/builtins/ppc/Makefile.mk -------------------------*- Makefile -*--===# -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -#===------------------------------------------------------------------------===# - -ModuleName := builtins -SubDirs := -OnlyArchs := ppc - -AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file))) -Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) -ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o) -Implementation := Optimized - -# FIXME: use automatic dependencies? -Dependencies := $(wildcard lib/*.h $(Dir)/*.h) diff --git a/lib/libcompiler_rt/subdf3.c b/lib/libcompiler_rt/subdf3.c index 7a79e5e7765..38340dfab1a 100644 --- a/lib/libcompiler_rt/subdf3.c +++ b/lib/libcompiler_rt/subdf3.c @@ -15,11 +15,15 @@ #define DOUBLE_PRECISION #include "fp_lib.h" -ARM_EABI_FNALIAS(dsub, subdf3) - // Subtraction; flip the sign bit of b and add. COMPILER_RT_ABI fp_t __subdf3(fp_t a, fp_t b) { return __adddf3(a, fromRep(toRep(b) ^ signBit)); } +#if defined(__ARM_EABI__) +AEABI_RTABI fp_t __aeabi_dsub(fp_t a, fp_t b) { + return __subdf3(a, b); +} +#endif + diff --git a/lib/libcompiler_rt/subsf3.c b/lib/libcompiler_rt/subsf3.c index c3b85144af4..34276b1447b 100644 --- a/lib/libcompiler_rt/subsf3.c +++ b/lib/libcompiler_rt/subsf3.c @@ -15,11 +15,15 @@ #define SINGLE_PRECISION #include "fp_lib.h" -ARM_EABI_FNALIAS(fsub, subsf3) - // Subtraction; flip the sign bit of b and add. COMPILER_RT_ABI fp_t __subsf3(fp_t a, fp_t b) { return __addsf3(a, fromRep(toRep(b) ^ signBit)); } +#if defined(__ARM_EABI__) +AEABI_RTABI fp_t __aeabi_fsub(fp_t a, fp_t b) { + return __subsf3(a, b); +} +#endif + diff --git a/lib/libcompiler_rt/truncdfhf2.c b/lib/libcompiler_rt/truncdfhf2.c index 17195cd9e79..4bb71aa178a 100644 --- a/lib/libcompiler_rt/truncdfhf2.c +++ b/lib/libcompiler_rt/truncdfhf2.c @@ -11,8 +11,13 @@ #define DST_HALF #include "fp_trunc_impl.inc" -ARM_EABI_FNALIAS(d2h, truncdfhf2) - COMPILER_RT_ABI uint16_t __truncdfhf2(double a) { return __truncXfYf2__(a); } + +#if defined(__ARM_EABI__) +AEABI_RTABI uint16_t __aeabi_d2h(double a) { + return __truncdfhf2(a); +} +#endif + diff --git a/lib/libcompiler_rt/truncdfsf2.c b/lib/libcompiler_rt/truncdfsf2.c index 46ec11dccd7..8bf58bb23a3 100644 --- a/lib/libcompiler_rt/truncdfsf2.c +++ b/lib/libcompiler_rt/truncdfsf2.c @@ -11,8 +11,13 @@ #define DST_SINGLE #include "fp_trunc_impl.inc" -ARM_EABI_FNALIAS(d2f, truncdfsf2) - COMPILER_RT_ABI float __truncdfsf2(double a) { return __truncXfYf2__(a); } + +#if defined(__ARM_EABI__) +AEABI_RTABI float __aeabi_d2f(double a) { + return __truncdfsf2(a); +} +#endif + diff --git a/lib/libcompiler_rt/truncsfhf2.c b/lib/libcompiler_rt/truncsfhf2.c index 9d61895bfd8..f6ce1fa1de0 100644 --- a/lib/libcompiler_rt/truncsfhf2.c +++ b/lib/libcompiler_rt/truncsfhf2.c @@ -11,8 +11,6 @@ #define DST_HALF #include "fp_trunc_impl.inc" -ARM_EABI_FNALIAS(f2h, truncsfhf2) - // Use a forwarding definition and noinline to implement a poor man's alias, // as there isn't a good cross-platform way of defining one. COMPILER_RT_ABI NOINLINE uint16_t __truncsfhf2(float a) { @@ -22,3 +20,10 @@ COMPILER_RT_ABI NOINLINE uint16_t __truncsfhf2(float a) { COMPILER_RT_ABI uint16_t __gnu_f2h_ieee(float a) { return __truncsfhf2(a); } + +#if defined(__ARM_EABI__) +AEABI_RTABI uint16_t __aeabi_f2h(float a) { + return __truncsfhf2(a); +} +#endif + diff --git a/lib/libcompiler_rt/udivsi3.c b/lib/libcompiler_rt/udivsi3.c index 5d0140cc3e7..8eccf102cc9 100644 --- a/lib/libcompiler_rt/udivsi3.c +++ b/lib/libcompiler_rt/udivsi3.c @@ -18,8 +18,6 @@ /* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ -ARM_EABI_FNALIAS(uidiv, udivsi3) - /* This function should not call __divsi3! */ COMPILER_RT_ABI su_int __udivsi3(su_int n, su_int d) @@ -64,3 +62,10 @@ __udivsi3(su_int n, su_int d) q = (q << 1) | carry; return q; } + +#if defined(__ARM_EABI__) +AEABI_RTABI su_int __aeabi_uidiv(su_int n, su_int d) { + return __udivsi3(n, d); +} +#endif + diff --git a/lib/libcompiler_rt/x86_64/Makefile.mk b/lib/libcompiler_rt/x86_64/Makefile.mk deleted file mode 100644 index 83848dddd96..00000000000 --- a/lib/libcompiler_rt/x86_64/Makefile.mk +++ /dev/null @@ -1,20 +0,0 @@ -#===- lib/builtins/x86_64/Makefile.mk ----------------------*- Makefile -*--===# -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -#===------------------------------------------------------------------------===# - -ModuleName := builtins -SubDirs := -OnlyArchs := x86_64 x86_64h - -AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file))) -Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) -ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o) -Implementation := Optimized - -# FIXME: use automatic dependencies? -Dependencies := $(wildcard lib/*.h $(Dir)/*.h) diff --git a/lib/libcompiler_rt/x86_64/floatdidf.c b/lib/libcompiler_rt/x86_64/floatdidf.c index 388404e5e08..dead0ed42c6 100644 --- a/lib/libcompiler_rt/x86_64/floatdidf.c +++ b/lib/libcompiler_rt/x86_64/floatdidf.c @@ -4,7 +4,7 @@ /* double __floatdidf(di_int a); */ -#ifdef __x86_64__ +#if defined(__x86_64__) || defined(_M_X64) #include "../int_lib.h" diff --git a/lib/libcompiler_rt/x86_64/floatdisf.c b/lib/libcompiler_rt/x86_64/floatdisf.c index 96c3728e92c..99d5621c632 100644 --- a/lib/libcompiler_rt/x86_64/floatdisf.c +++ b/lib/libcompiler_rt/x86_64/floatdisf.c @@ -2,7 +2,7 @@ * License. See LICENSE.TXT for details. */ -#ifdef __x86_64__ +#if defined(__x86_64__) || defined(_M_X64) #include "../int_lib.h"