"${CMAKE_SOURCE_DIR}/../../cmake/Modules")
include(base-config-ix)
include(CompilerRTUtils)
+
+ load_llvm_config()
+ construct_compiler_rt_default_triple()
+
if(APPLE)
include(CompilerRTDarwinUtils)
endif()
ashlti3.c
ashrdi3.c
ashrti3.c
- clear_cache.c
+ bswapdi2.c
+ bswapsi2.c
clzdi2.c
clzsi2.c
clzti2.c
divti3.c
divtf3.c
divxc3.c
- enable_execute_stack.c
- eprintf.c
extendsfdf2.c
extendhfsf2.c
ffsdi2.c
+ ffssi2.c
ffsti2.c
fixdfdi.c
fixdfsi.c
negvdi2.c
negvsi2.c
negvti2.c
+ os_version_check.c
paritydi2.c
paritysi2.c
parityti2.c
umodsi3.c
umodti3.c)
-if(COMPILER_RT_SUPPORTS_ATOMIC_KEYWORD)
+set(GENERIC_TF_SOURCES
+ comparetf2.c
+ extenddftf2.c
+ extendsftf2.c
+ fixtfdi.c
+ fixtfsi.c
+ fixtfti.c
+ fixunstfdi.c
+ fixunstfsi.c
+ fixunstfti.c
+ floatditf.c
+ floatsitf.c
+ floattitf.c
+ floatunditf.c
+ floatunsitf.c
+ floatuntitf.c
+ multc3.c
+ trunctfdf2.c
+ trunctfsf2.c)
+
+option(COMPILER_RT_EXCLUDE_ATOMIC_BUILTIN
+ "Skip the atomic builtin (this may be needed if system headers are unavailable)"
+ Off)
+
+if(NOT FUCHSIA AND NOT COMPILER_RT_BAREMETAL_BUILD)
set(GENERIC_SOURCES
${GENERIC_SOURCES}
- atomic.c)
+ emutls.c
+ enable_execute_stack.c
+ eprintf.c)
endif()
-set(MSVC_SOURCES
- divsc3.c
- divdc3.c
- divxc3.c
- mulsc3.c
- muldc3.c
- mulxc3.c)
-
+if(COMPILER_RT_HAS_ATOMIC_KEYWORD AND NOT COMPILER_RT_EXCLUDE_ATOMIC_BUILTIN)
+ set(GENERIC_SOURCES
+ ${GENERIC_SOURCES}
+ atomic.c)
+endif()
if(APPLE)
set(GENERIC_SOURCES
atomic_thread_fence.c)
endif()
-if(NOT WIN32 OR MINGW)
- set(GENERIC_SOURCES
- ${GENERIC_SOURCES}
- emutls.c)
-endif()
-
if (HAVE_UNWIND_H)
set(GENERIC_SOURCES
${GENERIC_SOURCES}
gcc_personality_v0.c)
endif ()
+if (NOT FUCHSIA)
+ set(GENERIC_SOURCES
+ ${GENERIC_SOURCES}
+ clear_cache.c)
+endif()
+
if (NOT MSVC)
set(x86_64_SOURCES
x86_64/chkstk.S
x86_64/floatdidf.c
x86_64/floatdisf.c
x86_64/floatdixf.c
- ${MSVC_SOURCES})
+ ${GENERIC_SOURCES})
set(x86_64h_SOURCES ${x86_64_SOURCES})
- set(i386_SOURCES ${MSVC_SOURCES})
+ set(i386_SOURCES ${GENERIC_SOURCES})
set(i686_SOURCES ${i386_SOURCES})
endif () # if (NOT MSVC)
set(arm_SOURCES
- arm/adddf3vfp.S
- arm/addsf3vfp.S
+ arm/bswapdi2.S
+ arm/bswapsi2.S
+ arm/clzdi2.S
+ arm/clzsi2.S
+ arm/comparesf2.S
+ arm/divmodsi4.S
+ arm/divsi3.S
+ arm/modsi3.S
+ arm/sync_fetch_and_add_4.S
+ arm/sync_fetch_and_add_8.S
+ arm/sync_fetch_and_and_4.S
+ arm/sync_fetch_and_and_8.S
+ arm/sync_fetch_and_max_4.S
+ arm/sync_fetch_and_max_8.S
+ arm/sync_fetch_and_min_4.S
+ arm/sync_fetch_and_min_8.S
+ arm/sync_fetch_and_nand_4.S
+ arm/sync_fetch_and_nand_8.S
+ arm/sync_fetch_and_or_4.S
+ arm/sync_fetch_and_or_8.S
+ arm/sync_fetch_and_sub_4.S
+ arm/sync_fetch_and_sub_8.S
+ arm/sync_fetch_and_umax_4.S
+ arm/sync_fetch_and_umax_8.S
+ arm/sync_fetch_and_umin_4.S
+ arm/sync_fetch_and_umin_8.S
+ arm/sync_fetch_and_xor_4.S
+ arm/sync_fetch_and_xor_8.S
+ arm/udivmodsi4.S
+ arm/udivsi3.S
+ arm/umodsi3.S
+ ${GENERIC_SOURCES})
+
+set(thumb1_SOURCES
+ arm/divsi3.S
+ arm/udivsi3.S
+ arm/comparesf2.S
+ arm/addsf3.S
+ ${GENERIC_SOURCES})
+
+set(arm_EABI_SOURCES
arm/aeabi_cdcmp.S
arm/aeabi_cdcmpeq_check_nan.c
arm/aeabi_cfcmp.S
arm/aeabi_memmove.S
arm/aeabi_memset.S
arm/aeabi_uidivmod.S
- arm/aeabi_uldivmod.S
- arm/bswapdi2.S
- arm/bswapsi2.S
- arm/clzdi2.S
- arm/clzsi2.S
- arm/comparesf2.S
+ arm/aeabi_uldivmod.S)
+
+set(arm_Thumb1_JT_SOURCES
+ arm/switch16.S
+ arm/switch32.S
+ arm/switch8.S
+ arm/switchu8.S)
+set(arm_Thumb1_SjLj_EH_SOURCES
+ arm/restore_vfp_d8_d15_regs.S
+ arm/save_vfp_d8_d15_regs.S)
+set(arm_Thumb1_VFPv2_SOURCES
+ arm/adddf3vfp.S
+ arm/addsf3vfp.S
arm/divdf3vfp.S
- arm/divmodsi4.S
arm/divsf3vfp.S
- arm/divsi3.S
arm/eqdf2vfp.S
arm/eqsf2vfp.S
arm/extendsfdf2vfp.S
arm/lesf2vfp.S
arm/ltdf2vfp.S
arm/ltsf2vfp.S
- arm/modsi3.S
arm/muldf3vfp.S
arm/mulsf3vfp.S
arm/nedf2vfp.S
arm/negdf2vfp.S
arm/negsf2vfp.S
arm/nesf2vfp.S
- arm/restore_vfp_d8_d15_regs.S
- arm/save_vfp_d8_d15_regs.S
arm/subdf3vfp.S
arm/subsf3vfp.S
- arm/switch16.S
- arm/switch32.S
- arm/switch8.S
- arm/switchu8.S
- arm/sync_fetch_and_add_4.S
- arm/sync_fetch_and_add_8.S
- arm/sync_fetch_and_and_4.S
- arm/sync_fetch_and_and_8.S
- arm/sync_fetch_and_max_4.S
- arm/sync_fetch_and_max_8.S
- arm/sync_fetch_and_min_4.S
- arm/sync_fetch_and_min_8.S
- arm/sync_fetch_and_nand_4.S
- arm/sync_fetch_and_nand_8.S
- arm/sync_fetch_and_or_4.S
- arm/sync_fetch_and_or_8.S
- arm/sync_fetch_and_sub_4.S
- arm/sync_fetch_and_sub_8.S
- arm/sync_fetch_and_umax_4.S
- arm/sync_fetch_and_umax_8.S
- arm/sync_fetch_and_umin_4.S
- arm/sync_fetch_and_umin_8.S
- arm/sync_fetch_and_xor_4.S
- arm/sync_fetch_and_xor_8.S
- arm/sync_synchronize.S
arm/truncdfsf2vfp.S
- arm/udivmodsi4.S
- arm/udivsi3.S
- arm/umodsi3.S
arm/unorddf2vfp.S
- arm/unordsf2vfp.S
- ${GENERIC_SOURCES})
+ arm/unordsf2vfp.S)
+set(arm_Thumb1_icache_SOURCES
+ arm/sync_synchronize.S)
+set(arm_Thumb1_SOURCES
+ ${arm_Thumb1_JT_SOURCES}
+ ${arm_Thumb1_SjLj_EH_SOURCES}
+ ${arm_Thumb1_VFPv2_SOURCES}
+ ${arm_Thumb1_icache_SOURCES})
+
+if(MINGW)
+ set(arm_SOURCES
+ arm/aeabi_idivmod.S
+ arm/aeabi_ldivmod.S
+ arm/aeabi_uidivmod.S
+ arm/aeabi_uldivmod.S
+ divmoddi4.c
+ divmodsi4.c
+ divdi3.c
+ divsi3.c
+ fixdfdi.c
+ fixsfdi.c
+ fixunsdfdi.c
+ fixunssfdi.c
+ floatdidf.c
+ floatdisf.c
+ floatundidf.c
+ floatundisf.c
+ mingw_fixfloat.c
+ moddi3.c
+ udivmoddi4.c
+ udivmodsi4.c
+ udivsi3.c
+ umoddi3.c
+ emutls.c)
+elseif(NOT WIN32)
+ # TODO the EABI sources should only be added to EABI targets
+ set(arm_SOURCES
+ ${arm_SOURCES}
+ ${arm_EABI_SOURCES}
+ ${arm_Thumb1_SOURCES})
+
+ set(thumb1_SOURCES
+ ${thumb1_SOURCES}
+ ${arm_EABI_SOURCES})
+endif()
set(aarch64_SOURCES
- comparetf2.c
- extenddftf2.c
- extendsftf2.c
- fixtfdi.c
- fixtfsi.c
- fixtfti.c
- fixunstfdi.c
- fixunstfsi.c
- fixunstfti.c
- floatditf.c
- floatsitf.c
- floatunditf.c
- floatunsitf.c
- multc3.c
- trunctfdf2.c
- trunctfsf2.c
+ ${GENERIC_TF_SOURCES}
${GENERIC_SOURCES})
set(armhf_SOURCES ${arm_SOURCES})
set(arm64_SOURCES ${aarch64_SOURCES})
# macho_embedded archs
-set(armv6m_SOURCES ${GENERIC_SOURCES})
+set(armv6m_SOURCES ${thumb1_SOURCES})
set(armv7m_SOURCES ${arm_SOURCES})
set(armv7em_SOURCES ${arm_SOURCES})
set(mips_SOURCES ${GENERIC_SOURCES})
set(mipsel_SOURCES ${mips_SOURCES})
-set(mips64_SOURCES ${mips_SOURCES})
-set(mips64el_SOURCES ${mips_SOURCES})
+set(mips64_SOURCES ${GENERIC_TF_SOURCES}
+ ${mips_SOURCES})
+set(mips64el_SOURCES ${GENERIC_TF_SOURCES}
+ ${mips_SOURCES})
set(wasm32_SOURCES ${GENERIC_SOURCES})
set(wasm64_SOURCES ${GENERIC_SOURCES})
add_subdirectory(macho_embedded)
darwin_add_builtin_libraries(${BUILTIN_SUPPORTED_OS})
else ()
- append_string_if(COMPILER_RT_HAS_STD_C99_FLAG -std=gnu99 maybe_stdc99)
+ set(BUILTIN_CFLAGS "")
+
+ append_list_if(COMPILER_RT_HAS_STD_C11_FLAG -std=c11 BUILTIN_CFLAGS)
+
+ # These flags would normally be added to CMAKE_C_FLAGS by the llvm
+ # cmake step. Add them manually if this is a standalone build.
+ if(COMPILER_RT_STANDALONE_BUILD)
+ append_list_if(COMPILER_RT_HAS_FPIC_FLAG -fPIC BUILTIN_CFLAGS)
+ append_list_if(COMPILER_RT_HAS_FNO_BUILTIN_FLAG -fno-builtin BUILTIN_CFLAGS)
+ append_list_if(COMPILER_RT_HAS_VISIBILITY_HIDDEN_FLAG -fvisibility=hidden BUILTIN_CFLAGS)
+ if(NOT COMPILER_RT_DEBUG)
+ append_list_if(COMPILER_RT_HAS_OMIT_FRAME_POINTER_FLAG -fomit-frame-pointer BUILTIN_CFLAGS)
+ endif()
+ endif()
+
+ set(BUILTIN_DEFS "")
+
+ append_list_if(COMPILER_RT_HAS_VISIBILITY_HIDDEN_FLAG VISIBILITY_HIDDEN BUILTIN_DEFS)
foreach (arch ${BUILTIN_SUPPORTED_ARCH})
if (CAN_TARGET_${arch})
+ # NOTE: some architectures (e.g. i386) have multiple names. Ensure that
+ # we catch them all.
+ set(_arch ${arch})
+ if("${arch}" STREQUAL "i686")
+ set(_arch "i386|i686")
+ endif()
+
# Filter out generic versions of routines that are re-implemented in
# architecture specific manner. This prevents multiple definitions of the
# same symbols, making the symbol selection non-deterministic.
foreach (_file ${${arch}_SOURCES})
- if (${_file} MATCHES ${arch}/*)
+ if (${_file} MATCHES ${_arch}/*)
get_filename_component(_name ${_file} NAME)
string(REPLACE ".S" ".c" _cname "${_name}")
list(REMOVE_ITEM ${arch}_SOURCES ${_cname})
endif ()
endforeach ()
+ # Needed for clear_cache on debug mode, due to r7's usage in inline asm.
+ # Release mode already sets it via -O2/3, Debug mode doesn't.
+ if (${arch} STREQUAL "armhf")
+ list(APPEND BUILTIN_CFLAGS -fomit-frame-pointer -DCOMPILER_RT_ARMHF_TARGET)
+ endif()
+
add_compiler_rt_runtime(clang_rt.builtins
STATIC
ARCHS ${arch}
SOURCES ${${arch}_SOURCES}
- CFLAGS ${maybe_stdc99}
+ DEFS ${BUILTIN_DEFS}
+ CFLAGS ${BUILTIN_CFLAGS}
PARENT_TARGET builtins)
endif ()
endforeach ()
-# $OpenBSD: Makefile,v 1.10 2017/09/08 19:04:00 naddy Exp $
+# $OpenBSD: Makefile,v 1.11 2017/12/26 20:59:44 patrick Exp $
.include <bsd.own.mk>
ashrdi3 \
ashrti3 \
atomic \
+ bswapdi2 \
+ bswapsi2 \
clear_cache \
clzdi2 \
clzsi2 \
extendsfdf2 \
extendhfsf2 \
ffsdi2 \
+ ffssi2 \
ffsti2 \
fixdfdi \
fixdfsi \
fixunstfti.c \
floatditf.c \
floatsitf.c \
- floatunsitf.c \
+ floattitf.c \
floatunditf.c \
+ floatunsitf.c \
+ floatuntitf.c \
multc3.c \
trunctfdf2.c \
trunctfsf2.c
aeabi_memset.S \
aeabi_uidivmod.S \
aeabi_uldivmod.S \
- bswapdi2.S \
- bswapsi2.S \
switch16.S \
switch32.S \
switch8.S \
+++ /dev/null
-#===- lib/builtins/Makefile.mk -----------------------------*- Makefile -*--===#
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs :=
-
-# Add arch specific optimized implementations.
-SubDirs += i386 ppc x86_64 arm armv6m
-
-# Add ARM64 dir.
-SubDirs += arm64
-
-# Define the variables for this specific directory.
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o)
-Implementation := Generic
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard $(Dir)/*.h)
si_int __ctzdi2(di_int a); // count trailing zeros
si_int __ctzti2(ti_int a); // count trailing zeros
+si_int __ffssi2(si_int a); // find least significant 1 bit
si_int __ffsdi2(di_int a); // find least significant 1 bit
si_int __ffsti2(ti_int a); // find least significant 1 bit
si_int __popcountdi2(di_int a); // bit population
si_int __popcountti2(ti_int a); // bit population
-uint32_t __bswapsi2(uint32_t a); // a byteswapped, arm only
-uint64_t __bswapdi2(uint64_t a); // a byteswapped, arm only
+uint32_t __bswapsi2(uint32_t a); // a byteswapped
+uint64_t __bswapdi2(uint64_t a); // a byteswapped
// Integral arithmetic
#define DOUBLE_PRECISION
#include "fp_add_impl.inc"
-ARM_EABI_FNALIAS(dadd, adddf3)
-
COMPILER_RT_ABI double __adddf3(double a, double b){
return __addXf3__(a, b);
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI double __aeabi_dadd(double a, double b) {
+ return __adddf3(a, b);
+}
+#endif
+
#define SINGLE_PRECISION
#include "fp_add_impl.inc"
-ARM_EABI_FNALIAS(fadd, addsf3)
-
COMPILER_RT_ABI float __addsf3(float a, float b) {
return __addXf3__(a, b);
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI float __aeabi_fadd(float a, float b) {
+ return __addsf3(a, b);
+}
+#endif
+
+++ /dev/null
-#===- lib/builtins/arm/Makefile.mk -------------------------*- Makefile -*--===#
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs :=
-OnlyArchs := armv5 armv6 armv7 armv7k armv7m armv7em armv7s
-
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
-Implementation := Optimized
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__adddf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vadd.f64 d0, d0, d1
+#else
vmov d6, r0, r1 // move first param from r0/r1 pair into d6
vmov d7, r2, r3 // move second param from r2/r3 pair into d7
vadd.f64 d6, d6, d7
vmov r0, r1, d6 // move result back to r0/r1 pair
+#endif
bx lr
END_COMPILERRT_FUNCTION(__adddf3vfp)
--- /dev/null
+/*===-- addsf3.S - Adds two single precision floating pointer numbers-----===//
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __addsf3 (single precision floating pointer number
+ * addition with the IEEE-754 default rounding (to nearest, ties to even)
+ * function for the ARM Thumb1 ISA.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+#define significandBits 23
+#define typeWidth 32
+
+ .syntax unified
+ .text
+ .thumb
+ .p2align 2
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fadd, __addsf3)
+
+DEFINE_COMPILERRT_THUMB_FUNCTION(__addsf3)
+ push {r4, r5, r6, r7, lr}
+ // Get the absolute value of a and b.
+ lsls r2, r0, #1
+ lsls r3, r1, #1
+ lsrs r2, r2, #1 /* aAbs */
+ beq LOCAL_LABEL(a_zero_nan_inf)
+ lsrs r3, r3, #1 /* bAbs */
+ beq LOCAL_LABEL(zero_nan_inf)
+
+ // Detect if a or b is infinity or Nan.
+ lsrs r6, r2, #(significandBits)
+ lsrs r7, r3, #(significandBits)
+ cmp r6, #0xFF
+ beq LOCAL_LABEL(zero_nan_inf)
+ cmp r7, #0xFF
+ beq LOCAL_LABEL(zero_nan_inf)
+
+ // Swap Rep and Abs so that a and aAbs has the larger absolute value.
+ cmp r2, r3
+ bhs LOCAL_LABEL(no_swap)
+ movs r4, r0
+ movs r5, r2
+ movs r0, r1
+ movs r2, r3
+ movs r1, r4
+ movs r3, r5
+LOCAL_LABEL(no_swap):
+
+ // Get the significands and shift them to give us round, guard and sticky.
+ lsls r4, r0, #(typeWidth - significandBits)
+ lsrs r4, r4, #(typeWidth - significandBits - 3) /* aSignificand << 3 */
+ lsls r5, r1, #(typeWidth - significandBits)
+ lsrs r5, r5, #(typeWidth - significandBits - 3) /* bSignificand << 3 */
+
+ // Get the implicitBit.
+ movs r6, #1
+ lsls r6, r6, #(significandBits + 3)
+
+ // Get aExponent and set implicit bit if necessary.
+ lsrs r2, r2, #(significandBits)
+ beq LOCAL_LABEL(a_done_implicit_bit)
+ orrs r4, r6
+LOCAL_LABEL(a_done_implicit_bit):
+
+ // Get bExponent and set implicit bit if necessary.
+ lsrs r3, r3, #(significandBits)
+ beq LOCAL_LABEL(b_done_implicit_bit)
+ orrs r5, r6
+LOCAL_LABEL(b_done_implicit_bit):
+
+ // Get the difference in exponents.
+ subs r6, r2, r3
+ beq LOCAL_LABEL(done_align)
+
+ // If b is denormal, then a must be normal as align > 0, and we only need to
+ // right shift bSignificand by (align - 1) bits.
+ cmp r3, #0
+ bne 1f
+ subs r6, r6, #1
+1:
+
+ // No longer needs bExponent. r3 is dead here.
+ // Set sticky bits of b: sticky = bSignificand << (typeWidth - align).
+ movs r3, #(typeWidth)
+ subs r3, r3, r6
+ movs r7, r5
+ lsls r7, r3
+ beq 1f
+ movs r7, #1
+1:
+
+ // bSignificand = bSignificand >> align | sticky;
+ lsrs r5, r6
+ orrs r5, r7
+ bne LOCAL_LABEL(done_align)
+ movs r5, #1 // sticky; b is known to be non-zero.
+
+LOCAL_LABEL(done_align):
+ // isSubtraction = (aRep ^ bRep) >> 31;
+ movs r7, r0
+ eors r7, r1
+ lsrs r7, #31
+ bne LOCAL_LABEL(do_substraction)
+
+ // Same sign, do Addition.
+
+ // aSignificand += bSignificand;
+ adds r4, r4, r5
+
+ // Check carry bit.
+ movs r6, #1
+ lsls r6, r6, #(significandBits + 3 + 1)
+ movs r7, r4
+ ands r7, r6
+ beq LOCAL_LABEL(form_result)
+ // If the addition carried up, we need to right-shift the result and
+ // adjust the exponent.
+ movs r7, r4
+ movs r6, #1
+ ands r7, r6 // sticky = aSignificand & 1;
+ lsrs r4, #1
+ orrs r4, r7 // result Significand
+ adds r2, #1 // result Exponent
+ // If we have overflowed the type, return +/- infinity.
+ cmp r2, 0xFF
+ beq LOCAL_LABEL(ret_inf)
+
+LOCAL_LABEL(form_result):
+ // Shift the sign, exponent and significand into place.
+ lsrs r0, #(typeWidth - 1)
+ lsls r0, #(typeWidth - 1) // Get Sign.
+ lsls r2, #(significandBits)
+ orrs r0, r2
+ movs r1, r4
+ lsls r4, #(typeWidth - significandBits - 3)
+ lsrs r4, #(typeWidth - significandBits)
+ orrs r0, r4
+
+ // Final rounding. The result may overflow to infinity, but that is the
+ // correct result in that case.
+ // roundGuardSticky = aSignificand & 0x7;
+ movs r2, #0x7
+ ands r1, r2
+ // if (roundGuardSticky > 0x4) result++;
+
+ cmp r1, #0x4
+ blt LOCAL_LABEL(done_round)
+ beq 1f
+ adds r0, #1
+ pop {r4, r5, r6, r7, pc}
+1:
+
+ // if (roundGuardSticky == 0x4) result += result & 1;
+ movs r1, r0
+ lsrs r1, #1
+ bcc LOCAL_LABEL(done_round)
+ adds r0, r0, #1
+LOCAL_LABEL(done_round):
+ pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(do_substraction):
+ subs r4, r4, r5 // aSignificand -= bSignificand;
+ beq LOCAL_LABEL(ret_zero)
+ movs r6, r4
+ cmp r2, 0
+ beq LOCAL_LABEL(form_result) // if a's exp is 0, no need to normalize.
+ // If partial cancellation occured, we need to left-shift the result
+ // and adjust the exponent:
+ lsrs r6, r6, #(significandBits + 3)
+ bne LOCAL_LABEL(form_result)
+
+ push {r0, r1, r2, r3}
+ movs r0, r4
+ bl __clzsi2
+ movs r5, r0
+ pop {r0, r1, r2, r3}
+ // shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3);
+ subs r5, r5, #(typeWidth - significandBits - 3 - 1)
+ // aSignificand <<= shift; aExponent -= shift;
+ lsls r4, r5
+ subs r2, r2, r5
+ bgt LOCAL_LABEL(form_result)
+
+ // Do normalization if aExponent <= 0.
+ movs r6, #1
+ subs r6, r6, r2 // 1 - aExponent;
+ movs r2, #0 // aExponent = 0;
+ movs r3, #(typeWidth) // bExponent is dead.
+ subs r3, r3, r6
+ movs r7, r4
+ lsls r7, r3 // stickyBit = (bool)(aSignificant << (typeWidth - align))
+ beq 1f
+ movs r7, #1
+1:
+ lsrs r4, r6 /* aSignificand >> shift */
+ orrs r4, r7
+ b LOCAL_LABEL(form_result)
+
+LOCAL_LABEL(ret_zero):
+ movs r0, #0
+ pop {r4, r5, r6, r7, pc}
+
+
+LOCAL_LABEL(a_zero_nan_inf):
+ lsrs r3, r3, #1
+
+LOCAL_LABEL(zero_nan_inf):
+ // Here r2 has aAbs, r3 has bAbs
+ movs r4, #0xFF
+ lsls r4, r4, #(significandBits) // Make +inf.
+
+ cmp r2, r4
+ bhi LOCAL_LABEL(a_is_nan)
+ cmp r3, r4
+ bhi LOCAL_LABEL(b_is_nan)
+
+ cmp r2, r4
+ bne LOCAL_LABEL(a_is_rational)
+ // aAbs is INF.
+ eors r1, r0 // aRep ^ bRep.
+ movs r6, #1
+ lsls r6, r6, #(typeWidth - 1) // get sign mask.
+ cmp r1, r6 // if they only differ on sign bit, it's -INF + INF
+ beq LOCAL_LABEL(a_is_nan)
+ pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(a_is_rational):
+ cmp r3, r4
+ bne LOCAL_LABEL(b_is_rational)
+ movs r0, r1
+ pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(b_is_rational):
+ // either a or b or both are zero.
+ adds r4, r2, r3
+ beq LOCAL_LABEL(both_zero)
+ cmp r2, #0 // is absA 0 ?
+ beq LOCAL_LABEL(ret_b)
+ pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(both_zero):
+ ands r0, r1 // +0 + -0 = +0
+ pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(ret_b):
+ movs r0, r1
+
+LOCAL_LABEL(ret):
+ pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(b_is_nan):
+ movs r0, r1
+LOCAL_LABEL(a_is_nan):
+ movs r1, #1
+ lsls r1, r1, #(significandBits -1) // r1 is quiet bit.
+ orrs r0, r1
+ pop {r4, r5, r6, r7, pc}
+
+LOCAL_LABEL(ret_inf):
+ movs r4, #0xFF
+ lsls r4, r4, #(significandBits)
+ orrs r0, r4
+ lsrs r0, r0, #(significandBits)
+ lsls r0, r0, #(significandBits)
+ pop {r4, r5, r6, r7, pc}
+
+
+END_COMPILERRT_FUNCTION(__addsf3)
+
+NO_EXEC_STACK_DIRECTIVE
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__addsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vadd.f32 s0, s0, s1
+#else
vmov s14, r0 // move first param from r0 into float register
vmov s15, r1 // move second param from r1 into float register
vadd.f32 s14, s14, s15
vmov r0, s14 // move result back to r0
+#endif
bx lr
END_COMPILERRT_FUNCTION(__addsf3vfp)
push {r0-r3, lr}
bl __aeabi_cdcmpeq_check_nan
cmp r0, #1
+#if __ARM_ARCH_ISA_THUMB == 1
+ beq 1f
+ // NaN has been ruled out, so __aeabi_cdcmple can't trap
+ mov r0, sp
+ ldm r0, {r0-r3}
+ bl __aeabi_cdcmple
+ pop {r0-r3, pc}
+1:
+ // Z = 0, C = 1
+ movs r0, #0xF
+ lsls r0, r0, #31
+ pop {r0-r3, pc}
+#else
pop {r0-r3, lr}
// NaN has been ruled out, so __aeabi_cdcmple can't trap
bne __aeabi_cdcmple
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
+ mov ip, #APSR_C
+ msr APSR_nzcvq, ip
+#else
msr CPSR_f, #APSR_C
+#endif
JMP(lr)
+#endif
END_COMPILERRT_FUNCTION(__aeabi_cdcmpeq)
bl __aeabi_dcmplt
cmp r0, #1
+#if __ARM_ARCH_ISA_THUMB == 1
+ bne 1f
+ // Z = 0, C = 0
+ movs r0, #1
+ lsls r0, r0, #1
+ pop {r0-r3, pc}
+1:
+ mov r0, sp
+ ldm r0, {r0-r3}
+ bl __aeabi_dcmpeq
+ cmp r0, #1
+ bne 2f
+ // Z = 1, C = 1
+ movs r0, #2
+ lsls r0, r0, #31
+ pop {r0-r3, pc}
+2:
+ // Z = 0, C = 1
+ movs r0, #0xF
+ lsls r0, r0, #31
+ pop {r0-r3, pc}
+#else
+ ITT(eq)
moveq ip, #0
beq 1f
ldm sp, {r0-r3}
bl __aeabi_dcmpeq
cmp r0, #1
+ ITE(eq)
moveq ip, #(APSR_C | APSR_Z)
movne ip, #(APSR_C)
1:
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
+ msr APSR_nzcvq, ip
+#else
msr CPSR_f, ip
+#endif
pop {r0-r3}
POP_PC()
+#endif
END_COMPILERRT_FUNCTION(__aeabi_cdcmple)
// int __aeabi_cdrcmple(double a, double b) {
//===----------------------------------------------------------------------===//
#include <stdint.h>
+#include "../int_lib.h"
-__attribute__((pcs("aapcs")))
-__attribute__((visibility("hidden")))
+AEABI_RTABI __attribute__((visibility("hidden")))
int __aeabi_cdcmpeq_check_nan(double a, double b) {
return __builtin_isnan(a) || __builtin_isnan(b);
}
push {r0-r3, lr}
bl __aeabi_cfcmpeq_check_nan
cmp r0, #1
+#if __ARM_ARCH_ISA_THUMB == 1
+ beq 1f
+ // NaN has been ruled out, so __aeabi_cfcmple can't trap
+ mov r0, sp
+ ldm r0, {r0-r3}
+ bl __aeabi_cfcmple
+ pop {r0-r3, pc}
+1:
+ // Z = 0, C = 1
+ movs r0, #0xF
+ lsls r0, r0, #31
+ pop {r0-r3, pc}
+#else
pop {r0-r3, lr}
// NaN has been ruled out, so __aeabi_cfcmple can't trap
bne __aeabi_cfcmple
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
+ mov ip, #APSR_C
+ msr APSR_nzcvq, ip
+#else
msr CPSR_f, #APSR_C
+#endif
JMP(lr)
+#endif
END_COMPILERRT_FUNCTION(__aeabi_cfcmpeq)
bl __aeabi_fcmplt
cmp r0, #1
+#if __ARM_ARCH_ISA_THUMB == 1
+ bne 1f
+ // Z = 0, C = 0
+ movs r0, #1
+ lsls r0, r0, #1
+ pop {r0-r3, pc}
+1:
+ mov r0, sp
+ ldm r0, {r0-r3}
+ bl __aeabi_fcmpeq
+ cmp r0, #1
+ bne 2f
+ // Z = 1, C = 1
+ movs r0, #2
+ lsls r0, r0, #31
+ pop {r0-r3, pc}
+2:
+ // Z = 0, C = 1
+ movs r0, #0xF
+ lsls r0, r0, #31
+ pop {r0-r3, pc}
+#else
+ ITT(eq)
moveq ip, #0
beq 1f
ldm sp, {r0-r3}
bl __aeabi_fcmpeq
cmp r0, #1
+ ITE(eq)
moveq ip, #(APSR_C | APSR_Z)
movne ip, #(APSR_C)
1:
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
+ msr APSR_nzcvq, ip
+#else
msr CPSR_f, ip
+#endif
pop {r0-r3}
POP_PC()
+#endif
END_COMPILERRT_FUNCTION(__aeabi_cfcmple)
// int __aeabi_cfrcmple(float a, float b) {
//===----------------------------------------------------------------------===//
#include <stdint.h>
+#include "../int_lib.h"
-__attribute__((pcs("aapcs")))
-__attribute__((visibility("hidden")))
+AEABI_RTABI __attribute__((visibility("hidden")))
int __aeabi_cfcmpeq_check_nan(float a, float b) {
return __builtin_isnan(a) || __builtin_isnan(b);
}
// }
// }
+#if defined(COMPILER_RT_ARMHF_TARGET)
+# define CONVERT_DCMP_ARGS_TO_DF2_ARGS \
+ vmov d0, r0, r1 SEPARATOR \
+ vmov d1, r2, r3
+#else
+# define CONVERT_DCMP_ARGS_TO_DF2_ARGS
+#endif
+
#define DEFINE_AEABI_DCMP(cond) \
.syntax unified SEPARATOR \
.p2align 2 SEPARATOR \
DEFINE_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond) \
push { r4, lr } SEPARATOR \
+ CONVERT_DCMP_ARGS_TO_DF2_ARGS SEPARATOR \
bl SYMBOL_NAME(__ ## cond ## df2) SEPARATOR \
cmp r0, #0 SEPARATOR \
b ## cond 1f SEPARATOR \
- mov r0, #0 SEPARATOR \
+ movs r0, #0 SEPARATOR \
pop { r4, pc } SEPARATOR \
1: SEPARATOR \
- mov r0, #1 SEPARATOR \
+ movs r0, #1 SEPARATOR \
pop { r4, pc } SEPARATOR \
END_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond)
* line.
*/
+#include "../int_lib.h"
+
/* provide an unused declaration to pacify pendantic compilation */
extern unsigned char declaration;
#if defined(__ARM_EABI__)
-int __attribute__((weak)) __attribute__((visibility("hidden")))
+AEABI_RTABI int __attribute__((weak)) __attribute__((visibility("hidden")))
__aeabi_idiv0(int return_value) {
return return_value;
}
-long long __attribute__((weak)) __attribute__((visibility("hidden")))
+AEABI_RTABI long long __attribute__((weak)) __attribute__((visibility("hidden")))
__aeabi_ldiv0(long long return_value) {
return return_value;
}
#define DOUBLE_PRECISION
#include "../fp_lib.h"
-COMPILER_RT_ABI fp_t
+AEABI_RTABI fp_t
__aeabi_dsub(fp_t, fp_t);
-COMPILER_RT_ABI fp_t
+AEABI_RTABI fp_t
__aeabi_drsub(fp_t a, fp_t b) {
return __aeabi_dsub(b, a);
}
// }
// }
+#if defined(COMPILER_RT_ARMHF_TARGET)
+# define CONVERT_FCMP_ARGS_TO_SF2_ARGS \
+ vmov s0, r0 SEPARATOR \
+ vmov s1, r1
+#else
+# define CONVERT_FCMP_ARGS_TO_SF2_ARGS
+#endif
+
#define DEFINE_AEABI_FCMP(cond) \
.syntax unified SEPARATOR \
.p2align 2 SEPARATOR \
DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond) \
push { r4, lr } SEPARATOR \
+ CONVERT_FCMP_ARGS_TO_SF2_ARGS SEPARATOR \
bl SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \
cmp r0, #0 SEPARATOR \
b ## cond 1f SEPARATOR \
- mov r0, #0 SEPARATOR \
+ movs r0, #0 SEPARATOR \
pop { r4, pc } SEPARATOR \
1: SEPARATOR \
- mov r0, #1 SEPARATOR \
+ movs r0, #1 SEPARATOR \
pop { r4, pc } SEPARATOR \
END_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond)
#define SINGLE_PRECISION
#include "../fp_lib.h"
-COMPILER_RT_ABI fp_t
+AEABI_RTABI fp_t
__aeabi_fsub(fp_t, fp_t);
-COMPILER_RT_ABI fp_t
+AEABI_RTABI fp_t
__aeabi_frsub(fp_t a, fp_t b) {
return __aeabi_fsub(b, a);
}
// return {quot, rem};
// }
+#if defined(__MINGW32__)
+#define __aeabi_idivmod __rt_sdiv
+#endif
+
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod)
+#if __ARM_ARCH_ISA_THUMB == 1
+ push {r0, r1, lr}
+ bl SYMBOL_NAME(__divsi3)
+ pop {r1, r2, r3} // now r0 = quot, r1 = num, r2 = denom
+ muls r2, r0, r2 // r2 = quot * denom
+ subs r1, r1, r2
+ JMP (r3)
+#else
push { lr }
sub sp, sp, #4
mov r2, sp
+#if defined(__MINGW32__)
+ mov r3, r0
+ mov r0, r1
+ mov r1, r3
+#endif
bl SYMBOL_NAME(__divmodsi4)
ldr r1, [sp]
add sp, sp, #4
pop { pc }
+#endif // __ARM_ARCH_ISA_THUMB == 1
END_COMPILERRT_FUNCTION(__aeabi_idivmod)
NO_EXEC_STACK_DIRECTIVE
// return {quot, rem};
// }
+#if defined(__MINGW32__)
+#define __aeabi_ldivmod __rt_sdiv64
+#endif
+
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__aeabi_ldivmod)
- push {r11, lr}
+ push {r6, lr}
sub sp, sp, #16
- add r12, sp, #8
- str r12, [sp]
+ add r6, sp, #8
+ str r6, [sp]
+#if defined(__MINGW32__)
+ movs r6, r0
+ movs r0, r2
+ movs r2, r6
+ movs r6, r1
+ movs r1, r3
+ movs r3, r6
+#endif
bl SYMBOL_NAME(__divmoddi4)
ldr r2, [sp, #8]
ldr r3, [sp, #12]
add sp, sp, #16
- pop {r11, pc}
+ pop {r6, pc}
END_COMPILERRT_FUNCTION(__aeabi_ldivmod)
NO_EXEC_STACK_DIRECTIVE
DEFINE_COMPILERRT_FUNCTION(__aeabi_memclr)
mov r2, r1
- mov r1, #0
+ movs r1, #0
b memset
END_COMPILERRT_FUNCTION(__aeabi_memclr)
// return {quot, rem};
// }
+#if defined(__MINGW32__)
+#define __aeabi_uidivmod __rt_udiv
+#endif
+
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod)
+#if __ARM_ARCH_ISA_THUMB == 1
+ cmp r0, r1
+ bcc LOCAL_LABEL(case_denom_larger)
+ push {r0, r1, lr}
+ bl SYMBOL_NAME(__aeabi_uidiv)
+ pop {r1, r2, r3}
+ muls r2, r0, r2 // r2 = quot * denom
+ subs r1, r1, r2
+ JMP (r3)
+LOCAL_LABEL(case_denom_larger):
+ movs r1, r0
+ movs r0, #0
+ JMP (lr)
+#else
push { lr }
sub sp, sp, #4
mov r2, sp
+#if defined(__MINGW32__)
+ mov r3, r0
+ mov r0, r1
+ mov r1, r3
+#endif
bl SYMBOL_NAME(__udivmodsi4)
ldr r1, [sp]
add sp, sp, #4
pop { pc }
+#endif
END_COMPILERRT_FUNCTION(__aeabi_uidivmod)
NO_EXEC_STACK_DIRECTIVE
// return {quot, rem};
// }
+#if defined(__MINGW32__)
+#define __aeabi_uldivmod __rt_udiv64
+#endif
+
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__aeabi_uldivmod)
- push {r11, lr}
+ push {r6, lr}
sub sp, sp, #16
- add r12, sp, #8
- str r12, [sp]
+ add r6, sp, #8
+ str r6, [sp]
+#if defined(__MINGW32__)
+ movs r6, r0
+ movs r0, r2
+ movs r2, r6
+ movs r6, r1
+ movs r1, r3
+ movs r3, r6
+#endif
bl SYMBOL_NAME(__udivmoddi4)
ldr r2, [sp, #8]
ldr r3, [sp, #12]
add sp, sp, #16
- pop {r11, pc}
+ pop {r6, pc}
END_COMPILERRT_FUNCTION(__aeabi_uldivmod)
NO_EXEC_STACK_DIRECTIVE
#include "../assembly.h"
.syntax unified
+#if __ARM_ARCH_ISA_THUMB == 2
+.thumb
+#endif
-.p2align 2
+@ int __eqsf2(float a, float b)
+
+ .p2align 2
DEFINE_COMPILERRT_FUNCTION(__eqsf2)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vmov r0, s0
+ vmov r1, s1
+#endif
// Make copies of a and b with the sign bit shifted off the top. These will
// be used to detect zeros and NaNs.
+#if __ARM_ARCH_ISA_THUMB == 1
+ push {r6, lr}
+ lsls r2, r0, #1
+ lsls r3, r1, #1
+#else
mov r2, r0, lsl #1
mov r3, r1, lsl #1
+#endif
// We do the comparison in three stages (ignoring NaN values for the time
// being). First, we orr the absolute values of a and b; this sets the Z
// flag if both a and b are zero (of either sign). The shift of r3 doesn't
// effect this at all, but it *does* make sure that the C flag is clear for
// the subsequent operations.
+#if __ARM_ARCH_ISA_THUMB == 1
+ lsrs r6, r3, #1
+ orrs r6, r2
+#else
orrs r12, r2, r3, lsr #1
-
+#endif
// Next, we check if a and b have the same or different signs. If they have
// opposite signs, this eor will set the N flag.
+#if __ARM_ARCH_ISA_THUMB == 1
+ beq 1f
+ movs r6, r0
+ eors r6, r1
+1:
+#else
it ne
eorsne r12, r0, r1
+#endif
// If a and b are equal (either both zeros or bit identical; again, we're
// ignoring NaNs for now), this subtract will zero out r0. If they have the
// same sign, the flags are updated as they would be for a comparison of the
// absolute values of a and b.
+#if __ARM_ARCH_ISA_THUMB == 1
+ bmi 1f
+ subs r0, r2, r3
+1:
+#else
it pl
subspl r0, r2, r3
+#endif
// If a is smaller in magnitude than b and both have the same sign, place
// the negation of the sign of b in r0. Thus, if both are negative and
// still clear from the shift argument in orrs; if a is positive and b
// negative, this places 0 in r0; if a is negative and b positive, -1 is
// placed in r0.
+#if __ARM_ARCH_ISA_THUMB == 1
+ bhs 1f
+ // Here if a and b have the same sign and absA < absB, the result is thus
+ // b < 0 ? 1 : -1. Same if a and b have the opposite sign (ignoring Nan).
+ movs r0, #1
+ lsrs r1, #31
+ bne LOCAL_LABEL(CHECK_NAN)
+ negs r0, r0
+ b LOCAL_LABEL(CHECK_NAN)
+1:
+#else
it lo
mvnlo r0, r1, asr #31
+#endif
// If a is greater in magnitude than b and both have the same sign, place
// the sign of b in r0. Thus, if both are negative and a < b, -1 is placed
// in r0, which is the desired result. Conversely, if both are positive
// and a > b, zero is placed in r0.
+#if __ARM_ARCH_ISA_THUMB == 1
+ bls 1f
+ // Here both have the same sign and absA > absB.
+ movs r0, #1
+ lsrs r1, #31
+ beq LOCAL_LABEL(CHECK_NAN)
+ negs r0, r0
+1:
+#else
it hi
movhi r0, r1, asr #31
+#endif
// If you've been keeping track, at this point r0 contains -1 if a < b and
// 0 if a >= b. All that remains to be done is to set it to 1 if a > b.
// If a == b, then the Z flag is set, so we can get the correct final value
// into r0 by simply or'ing with 1 if Z is clear.
+ // For Thumb-1, r0 contains -1 if a < b, 0 if a > b and 0 if a == b.
+#if __ARM_ARCH_ISA_THUMB != 1
it ne
orrne r0, r0, #1
+#endif
// Finally, we need to deal with NaNs. If either argument is NaN, replace
// the value in r0 with 1.
+#if __ARM_ARCH_ISA_THUMB == 1
+LOCAL_LABEL(CHECK_NAN):
+ movs r6, #0xff
+ lsls r6, #24
+ cmp r2, r6
+ bhi 1f
+ cmp r3, r6
+1:
+ bls 2f
+ movs r0, #1
+2:
+ pop {r6, pc}
+#else
cmp r2, #0xff000000
ite ls
cmpls r3, #0xff000000
movhi r0, #1
JMP(lr)
+#endif
END_COMPILERRT_FUNCTION(__eqsf2)
+
DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __eqsf2)
DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __eqsf2)
DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __eqsf2)
-.p2align 2
+@ int __gtsf2(float a, float b)
+
+ .p2align 2
DEFINE_COMPILERRT_FUNCTION(__gtsf2)
// Identical to the preceding except in that we return -1 for NaN values.
- // Given that the two paths share so much code, one might be tempted to
+ // Given that the two paths share so much code, one might be tempted to
// unify them; however, the extra code needed to do so makes the code size
// to performance tradeoff very hard to justify for such small functions.
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vmov r0, s0
+ vmov r1, s1
+#endif
+#if __ARM_ARCH_ISA_THUMB == 1
+ push {r6, lr}
+ lsls r2, r0, #1
+ lsls r3, r1, #1
+ lsrs r6, r3, #1
+ orrs r6, r2
+ beq 1f
+ movs r6, r0
+ eors r6, r1
+1:
+ bmi 2f
+ subs r0, r2, r3
+2:
+ bhs 3f
+ movs r0, #1
+ lsrs r1, #31
+ bne LOCAL_LABEL(CHECK_NAN_2)
+ negs r0, r0
+ b LOCAL_LABEL(CHECK_NAN_2)
+3:
+ bls 4f
+ movs r0, #1
+ lsrs r1, #31
+ beq LOCAL_LABEL(CHECK_NAN_2)
+ negs r0, r0
+4:
+LOCAL_LABEL(CHECK_NAN_2):
+ movs r6, #0xff
+ lsls r6, #24
+ cmp r2, r6
+ bhi 5f
+ cmp r3, r6
+5:
+ bls 6f
+ movs r0, #1
+ negs r0, r0
+6:
+ pop {r6, pc}
+#else
mov r2, r0, lsl #1
mov r3, r1, lsl #1
orrs r12, r2, r3, lsr #1
cmpls r3, #0xff000000
movhi r0, #-1
JMP(lr)
+#endif
END_COMPILERRT_FUNCTION(__gtsf2)
+
DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __gtsf2)
-.p2align 2
+@ int __unordsf2(float a, float b)
+
+ .p2align 2
DEFINE_COMPILERRT_FUNCTION(__unordsf2)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vmov r0, s0
+ vmov r1, s1
+#endif
// Return 1 for NaN values, 0 otherwise.
- mov r2, r0, lsl #1
- mov r3, r1, lsl #1
- mov r0, #0
+ lsls r2, r0, #1
+ lsls r3, r1, #1
+ movs r0, #0
+#if __ARM_ARCH_ISA_THUMB == 1
+ movs r1, #0xff
+ lsls r1, #24
+ cmp r2, r1
+ bhi 1f
+ cmp r3, r1
+1:
+ bls 2f
+ movs r0, #1
+2:
+#else
cmp r2, #0xff000000
ite ls
cmpls r3, #0xff000000
movhi r0, #1
+#endif
JMP(lr)
END_COMPILERRT_FUNCTION(__unordsf2)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmpum)
+ vmov s0, r0
+ vmov s1, r1
+ b SYMBOL_NAME(__unordsf2)
+END_COMPILERRT_FUNCTION(__aeabi_fcmpum)
+#else
DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fcmpun, __unordsf2)
+#endif
NO_EXEC_STACK_DIRECTIVE
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__divdf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vdiv.f64 d0, d0, d1
+#else
vmov d6, r0, r1 // move first param from r0/r1 pair into d6
vmov d7, r2, r3 // move second param from r2/r3 pair into d7
- vdiv.f64 d5, d6, d7
+ vdiv.f64 d5, d6, d7
vmov r0, r1, d5 // move result back to r0/r1 pair
+#endif
bx lr
END_COMPILERRT_FUNCTION(__divdf3vfp)
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__divsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vdiv.f32 s0, s0, s1
+#else
vmov s14, r0 // move first param from r0 into float register
vmov s15, r1 // move second param from r1 into float register
vdiv.f32 s13, s14, s15
vmov r0, s13 // move result back to r0
+#endif
bx lr
END_COMPILERRT_FUNCTION(__divsf3vfp)
#else
ESTABLISH_FRAME
// Set aside the sign of the quotient.
+# if __ARM_ARCH_ISA_THUMB == 1
+ movs r4, r0
+ eors r4, r1
+# else
eor r4, r0, r1
+# endif
// Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31).
+# if __ARM_ARCH_ISA_THUMB == 1
+ asrs r2, r0, #31
+ asrs r3, r1, #31
+ eors r0, r2
+ eors r1, r3
+ subs r0, r0, r2
+ subs r1, r1, r3
+# else
eor r2, r0, r0, asr #31
eor r3, r1, r1, asr #31
sub r0, r2, r0, asr #31
sub r1, r3, r1, asr #31
+# endif
// abs(a) / abs(b)
bl SYMBOL_NAME(__udivsi3)
// Apply sign of quotient to result and return.
+# if __ARM_ARCH_ISA_THUMB == 1
+ asrs r4, #31
+ eors r0, r4
+ subs r0, r0, r4
+# else
eor r0, r0, r4, asr #31
sub r0, r0, r4, asr #31
+# endif
CLEAR_FRAME_AND_RETURN
#endif
END_COMPILERRT_FUNCTION(__divsi3)
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__eqdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f64 d0, d1
+#else
vmov d6, r0, r1 // load r0/r1 pair in double register
vmov d7, r2, r3 // load r2/r3 pair in double register
vcmp.f64 d6, d7
+#endif
vmrs apsr_nzcv, fpscr
+ ITE(eq)
moveq r0, #1 // set result register to 1 if equal
movne r0, #0
bx lr
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__eqsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f32 s0, s1
+#else
vmov s14, r0 // move from GPR 0 to float register
vmov s15, r1 // move from GPR 1 to float register
vcmp.f32 s14, s15
+#endif
vmrs apsr_nzcv, fpscr
+ ITE(eq)
moveq r0, #1 // set result register to 1 if equal
movne r0, #0
bx lr
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__extendsfdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcvt.f64.f32 d0, s0
+#else
vmov s15, r0 // load float register from R0
vcvt.f64.f32 d7, s15 // convert single to double
vmov r0, r1, d7 // return result in r0/r1 pair
+#endif
bx lr
END_COMPILERRT_FUNCTION(__extendsfdf2vfp)
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__fixdfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcvt.s32.f64 s0, d0
+ vmov r0, s0
+#else
vmov d7, r0, r1 // load double register from R0/R1
vcvt.s32.f64 s15, d7 // convert double to 32-bit int into s15
vmov r0, s15 // move s15 to result register
+#endif
bx lr
END_COMPILERRT_FUNCTION(__fixdfsivfp)
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__fixsfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcvt.s32.f32 s0, s0
+ vmov r0, s0
+#else
vmov s15, r0 // load float register from R0
vcvt.s32.f32 s15, s15 // convert single to 32-bit int into s15
vmov r0, s15 // move s15 to result register
+#endif
bx lr
END_COMPILERRT_FUNCTION(__fixsfsivfp)
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__fixunsdfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcvt.u32.f64 s0, d0
+ vmov r0, s0
+#else
vmov d7, r0, r1 // load double register from R0/R1
vcvt.u32.f64 s15, d7 // convert double to 32-bit int into s15
vmov r0, s15 // move s15 to result register
+#endif
bx lr
END_COMPILERRT_FUNCTION(__fixunsdfsivfp)
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__fixunssfsivfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcvt.u32.f32 s0, s0
+ vmov r0, s0
+#else
vmov s15, r0 // load float register from R0
vcvt.u32.f32 s15, s15 // convert single to 32-bit unsigned into s15
vmov r0, s15 // move s15 to result register
+#endif
bx lr
END_COMPILERRT_FUNCTION(__fixunssfsivfp)
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__floatsidfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vmov s0, r0
+ vcvt.f64.s32 d0, s0
+#else
vmov s15, r0 // move int to float register s15
vcvt.f64.s32 d7, s15 // convert 32-bit int in s15 to double in d7
vmov r0, r1, d7 // move d7 to result register pair r0/r1
+#endif
bx lr
END_COMPILERRT_FUNCTION(__floatsidfvfp)
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__floatsisfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vmov s0, r0
+ vcvt.f32.s32 s0, s0
+#else
vmov s15, r0 // move int to float register s15
vcvt.f32.s32 s15, s15 // convert 32-bit int in s15 to float in s15
vmov r0, s15 // move s15 to result register
+#endif
bx lr
END_COMPILERRT_FUNCTION(__floatsisfvfp)
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__floatunssidfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vmov s0, r0
+ vcvt.f64.u32 d0, s0
+#else
vmov s15, r0 // move int to float register s15
vcvt.f64.u32 d7, s15 // convert 32-bit int in s15 to double in d7
vmov r0, r1, d7 // move d7 to result register pair r0/r1
+#endif
bx lr
END_COMPILERRT_FUNCTION(__floatunssidfvfp)
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__floatunssisfvfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vmov s0, r0
+ vcvt.f32.u32 s0, s0
+#else
vmov s15, r0 // move int to float register s15
vcvt.f32.u32 s15, s15 // convert 32-bit int in s15 to float in s15
vmov r0, s15 // move s15 to result register
+#endif
bx lr
END_COMPILERRT_FUNCTION(__floatunssisfvfp)
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__gedf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f64 d0, d1
+#else
vmov d6, r0, r1 // load r0/r1 pair in double register
vmov d7, r2, r3 // load r2/r3 pair in double register
vcmp.f64 d6, d7
+#endif
vmrs apsr_nzcv, fpscr
+ ITE(ge)
movge r0, #1 // set result register to 1 if greater than or equal
movlt r0, #0
bx lr
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__gesf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f32 s0, s1
+#else
vmov s14, r0 // move from GPR 0 to float register
vmov s15, r1 // move from GPR 1 to float register
vcmp.f32 s14, s15
+#endif
vmrs apsr_nzcv, fpscr
+ ITE(ge)
movge r0, #1 // set result register to 1 if greater than or equal
movlt r0, #0
bx lr
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__gtdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f64 d0, d1
+#else
vmov d6, r0, r1 // load r0/r1 pair in double register
vmov d7, r2, r3 // load r2/r3 pair in double register
vcmp.f64 d6, d7
+#endif
vmrs apsr_nzcv, fpscr
+ ITE(gt)
movgt r0, #1 // set result register to 1 if equal
movle r0, #0
bx lr
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__gtsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f32 s0, s1
+#else
vmov s14, r0 // move from GPR 0 to float register
vmov s15, r1 // move from GPR 1 to float register
vcmp.f32 s14, s15
+#endif
vmrs apsr_nzcv, fpscr
+ ITE(gt)
movgt r0, #1 // set result register to 1 if equal
movle r0, #0
bx lr
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__ledf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f64 d0, d1
+#else
vmov d6, r0, r1 // load r0/r1 pair in double register
vmov d7, r2, r3 // load r2/r3 pair in double register
vcmp.f64 d6, d7
+#endif
vmrs apsr_nzcv, fpscr
+ ITE(ls)
movls r0, #1 // set result register to 1 if equal
movhi r0, #0
bx lr
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__lesf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f32 s0, s1
+#else
vmov s14, r0 // move from GPR 0 to float register
vmov s15, r1 // move from GPR 1 to float register
vcmp.f32 s14, s15
+#endif
vmrs apsr_nzcv, fpscr
+ ITE(ls)
movls r0, #1 // set result register to 1 if equal
movhi r0, #0
bx lr
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__ltdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f64 d0, d1
+#else
vmov d6, r0, r1 // load r0/r1 pair in double register
vmov d7, r2, r3 // load r2/r3 pair in double register
vcmp.f64 d6, d7
+#endif
vmrs apsr_nzcv, fpscr
+ ITE(mi)
movmi r0, #1 // set result register to 1 if equal
movpl r0, #0
bx lr
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__ltsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f32 s0, s1
+#else
vmov s14, r0 // move from GPR 0 to float register
vmov s15, r1 // move from GPR 1 to float register
vcmp.f32 s14, s15
+#endif
vmrs apsr_nzcv, fpscr
+ ITE(mi)
movmi r0, #1 // set result register to 1 if equal
movpl r0, #0
bx lr
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__muldf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vmul.f64 d0, d0, d1
+#else
vmov d6, r0, r1 // move first param from r0/r1 pair into d6
vmov d7, r2, r3 // move second param from r2/r3 pair into d7
- vmul.f64 d6, d6, d7
+ vmul.f64 d6, d6, d7
vmov r0, r1, d6 // move result back to r0/r1 pair
+#endif
bx lr
END_COMPILERRT_FUNCTION(__muldf3vfp)
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__mulsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vmul.f32 s0, s0, s1
+#else
vmov s14, r0 // move first param from r0 into float register
vmov s15, r1 // move second param from r1 into float register
vmul.f32 s13, s14, s15
+#endif
vmov r0, s13 // move result back to r0
bx lr
END_COMPILERRT_FUNCTION(__mulsf3vfp)
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__nedf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f64 d0, d1
+#else
vmov d6, r0, r1 // load r0/r1 pair in double register
vmov d7, r2, r3 // load r2/r3 pair in double register
vcmp.f64 d6, d7
+#endif
vmrs apsr_nzcv, fpscr
+ ITE(ne)
movne r0, #1 // set result register to 0 if unequal
moveq r0, #0
bx lr
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__negdf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vneg.f64 d0, d0
+#else
eor r1, r1, #-2147483648 // flip sign bit on double in r0/r1 pair
+#endif
bx lr
END_COMPILERRT_FUNCTION(__negdf2vfp)
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__negsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vneg.f32 s0, s0
+#else
eor r0, r0, #-2147483648 // flip sign bit on float in r0
+#endif
bx lr
END_COMPILERRT_FUNCTION(__negsf2vfp)
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__nesf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f32 s0, s1
+#else
vmov s14, r0 // move from GPR 0 to float register
vmov s15, r1 // move from GPR 1 to float register
vcmp.f32 s14, s15
+#endif
vmrs apsr_nzcv, fpscr
+ ITE(ne)
movne r0, #1 // set result register to 1 if unequal
moveq r0, #0
bx lr
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__subdf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vsub.f64 d0, d0, d1
+#else
vmov d6, r0, r1 // move first param from r0/r1 pair into d6
vmov d7, r2, r3 // move second param from r2/r3 pair into d7
vsub.f64 d6, d6, d7
vmov r0, r1, d6 // move result back to r0/r1 pair
+#endif
bx lr
END_COMPILERRT_FUNCTION(__subdf3vfp)
//
// extern float __subsf3vfp(float a, float b);
//
-// Returns the difference between two single precision floating point numbers
+// Returns the difference between two single precision floating point numbers
// using the Darwin calling convention where single arguments are passsed
// like 32-bit ints.
//
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__subsf3vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vsub.f32 s0, s0, s1
+#else
vmov s14, r0 // move first param from r0 into float register
vmov s15, r1 // move second param from r1 into float register
vsub.f32 s14, s14, s15
vmov r0, s14 // move result back to r0
+#endif
bx lr
END_COMPILERRT_FUNCTION(__subsf3vfp)
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__truncdfsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcvt.f32.f64 s0, d0
+#else
vmov d7, r0, r1 // load double from r0/r1 pair
vcvt.f32.f64 s15, d7 // convert double to single (trucate precision)
vmov r0, s15 // return result in r0
+#endif
bx lr
END_COMPILERRT_FUNCTION(__truncdfsf2vfp)
beq LOCAL_LABEL(divby0)
udiv r0, r0, r1
bx lr
-#else
+
+LOCAL_LABEL(divby0):
+ mov r0, #0
+# ifdef __ARM_EABI__
+ b __aeabi_idiv0
+# else
+ JMP(lr)
+# endif
+
+#else /* ! __ARM_ARCH_EXT_IDIV__ */
cmp r1, #1
bcc LOCAL_LABEL(divby0)
+#if __ARM_ARCH_ISA_THUMB == 1
+ bne LOCAL_LABEL(num_neq_denom)
+ JMP(lr)
+LOCAL_LABEL(num_neq_denom):
+#else
IT(eq)
JMPc(lr, eq)
+#endif
cmp r0, r1
+#if __ARM_ARCH_ISA_THUMB == 1
+ bhs LOCAL_LABEL(num_ge_denom)
+ movs r0, #0
+ JMP(lr)
+LOCAL_LABEL(num_ge_denom):
+#else
ITT(cc)
movcc r0, #0
JMPc(lr, cc)
+#endif
+
/*
* Implement division using binary long division algorithm.
*
* that (r0 << shift) < 2 * r1. The quotient is stored in r3.
*/
-# ifdef __ARM_FEATURE_CLZ
+# if defined(__ARM_FEATURE_CLZ)
clz ip, r0
clz r3, r1
/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
sub ip, ip, r3, lsl #3
mov r3, #0
bx ip
-# else
+# else /* No CLZ Feature */
# if __ARM_ARCH_ISA_THUMB == 2
# error THUMB mode requires CLZ or UDIV
# endif
+# if __ARM_ARCH_ISA_THUMB == 1
+# define BLOCK_SIZE 10
+# else
+# define BLOCK_SIZE 12
+# endif
+
mov r2, r0
+# if __ARM_ARCH_ISA_THUMB == 1
+ mov ip, r0
+ adr r0, LOCAL_LABEL(div0block)
+ adds r0, #1
+# else
adr ip, LOCAL_LABEL(div0block)
-
- lsr r3, r2, #16
+# endif
+ lsrs r3, r2, #16
cmp r3, r1
+# if __ARM_ARCH_ISA_THUMB == 1
+ blo LOCAL_LABEL(skip_16)
+ movs r2, r3
+ subs r0, r0, #(16 * BLOCK_SIZE)
+LOCAL_LABEL(skip_16):
+# else
movhs r2, r3
- subhs ip, ip, #(16 * 12)
+ subhs ip, ip, #(16 * BLOCK_SIZE)
+# endif
- lsr r3, r2, #8
+ lsrs r3, r2, #8
cmp r3, r1
+# if __ARM_ARCH_ISA_THUMB == 1
+ blo LOCAL_LABEL(skip_8)
+ movs r2, r3
+ subs r0, r0, #(8 * BLOCK_SIZE)
+LOCAL_LABEL(skip_8):
+# else
movhs r2, r3
- subhs ip, ip, #(8 * 12)
+ subhs ip, ip, #(8 * BLOCK_SIZE)
+# endif
- lsr r3, r2, #4
+ lsrs r3, r2, #4
cmp r3, r1
+# if __ARM_ARCH_ISA_THUMB == 1
+ blo LOCAL_LABEL(skip_4)
+ movs r2, r3
+ subs r0, r0, #(4 * BLOCK_SIZE)
+LOCAL_LABEL(skip_4):
+# else
movhs r2, r3
- subhs ip, #(4 * 12)
+ subhs ip, #(4 * BLOCK_SIZE)
+# endif
- lsr r3, r2, #2
+ lsrs r3, r2, #2
cmp r3, r1
+# if __ARM_ARCH_ISA_THUMB == 1
+ blo LOCAL_LABEL(skip_2)
+ movs r2, r3
+ subs r0, r0, #(2 * BLOCK_SIZE)
+LOCAL_LABEL(skip_2):
+# else
movhs r2, r3
- subhs ip, ip, #(2 * 12)
+ subhs ip, ip, #(2 * BLOCK_SIZE)
+# endif
/* Last block, no need to update r2 or r3. */
+# if __ARM_ARCH_ISA_THUMB == 1
+ lsrs r3, r2, #1
+ cmp r3, r1
+ blo LOCAL_LABEL(skip_1)
+ subs r0, r0, #(1 * BLOCK_SIZE)
+LOCAL_LABEL(skip_1):
+ movs r2, r0
+ mov r0, ip
+ movs r3, #0
+ JMP (r2)
+
+# else
cmp r1, r2, lsr #1
- subls ip, ip, #(1 * 12)
+ subls ip, ip, #(1 * BLOCK_SIZE)
- mov r3, #0
+ movs r3, #0
JMP(ip)
-# endif
+# endif
+# endif /* __ARM_FEATURE_CLZ */
+
#define IMM #
+ /* due to the range limit of branch in Thumb1, we have to place the
+ block closer */
+LOCAL_LABEL(divby0):
+ movs r0, #0
+# if defined(__ARM_EABI__)
+ push {r7, lr}
+ bl __aeabi_idiv0 // due to relocation limit, can't use b.
+ pop {r7, pc}
+# else
+ JMP(lr)
+# endif
+
+#if __ARM_ARCH_ISA_THUMB == 1
+#define block(shift) \
+ lsls r2, r1, IMM shift; \
+ cmp r0, r2; \
+ blo LOCAL_LABEL(block_skip_##shift); \
+ subs r0, r0, r2; \
+ LOCAL_LABEL(block_skip_##shift) :; \
+ adcs r3, r3 /* same as ((r3 << 1) | Carry). Carry is set if r0 >= r2. */
+
+ /* TODO: if current location counter is not not word aligned, we don't
+ need the .p2align and nop */
+ /* Label div0block must be word-aligned. First align block 31 */
+ .p2align 2
+ nop /* Padding to align div0block as 31 blocks = 310 bytes */
+
+#else
#define block(shift) \
cmp r0, r1, lsl IMM shift; \
ITT(hs); \
WIDE(addhs) r3, r3, IMM (1 << shift); \
WIDE(subhs) r0, r0, r1, lsl IMM shift
+#endif
block(31)
block(30)
JMP(lr)
#endif /* __ARM_ARCH_EXT_IDIV__ */
-LOCAL_LABEL(divby0):
- mov r0, #0
-#ifdef __ARM_EABI__
- b __aeabi_idiv0
-#else
- JMP(lr)
-#endif
-
END_COMPILERRT_FUNCTION(__udivsi3)
NO_EXEC_STACK_DIRECTIVE
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__unorddf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f64 d0, d1
+#else
vmov d6, r0, r1 // load r0/r1 pair in double register
vmov d7, r2, r3 // load r2/r3 pair in double register
- vcmp.f64 d6, d7
+ vcmp.f64 d6, d7
+#endif
vmrs apsr_nzcv, fpscr
+ ITE(vs)
movvs r0, #1 // set result register to 1 if "overflow" (any NaNs)
movvc r0, #0
bx lr
.syntax unified
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__unordsf2vfp)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+ vcmp.f32 s0, s1
+#else
vmov s14, r0 // move from GPR 0 to float register
vmov s15, r1 // move from GPR 1 to float register
vcmp.f32 s14, s15
+#endif
vmrs apsr_nzcv, fpscr
+ ITE(vs)
movvs r0, #1 // set result register to 1 if "overflow" (any NaNs)
movvc r0, #0
bx lr
+++ /dev/null
-#===- lib/builtins/arm64/Makefile.mk -----------------------*- Makefile -*--===#
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs :=
-OnlyArchs := arm64
-
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
-Implementation := Optimized
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
+++ /dev/null
-#===- lib/builtins/arm/Makefile.mk -------------------------*- Makefile -*--===#
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs :=
-OnlyArchs := armv6m
-
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
-Implementation := Optimized
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
/* Precondition: 0 <= b < bits_in_dword */
-ARM_EABI_FNALIAS(llsl, ashldi3)
-
COMPILER_RT_ABI di_int
__ashldi3(di_int a, si_int b)
{
}
return result.all;
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI di_int __aeabi_llsl(di_int a, si_int b) {
+ return __ashldi3(a, b);
+}
+#endif
+
/* Precondition: 0 <= b < bits_in_dword */
-ARM_EABI_FNALIAS(lasr, ashrdi3)
-
COMPILER_RT_ABI di_int
__ashrdi3(di_int a, si_int b)
{
}
return result.all;
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI di_int __aeabi_lasr(di_int a, si_int b) {
+ return __ashrdi3(a, b);
+}
+#endif
+
#endif
#define CONST_SECTION .section .rodata
-#if defined(__GNU__) || defined(__ANDROID__) || defined(__FreeBSD__)
+#if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \
+ defined(__linux__)
#define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits
#else
#define NO_EXEC_STACK_DIRECTIVE
#if defined(__ARM_ARCH_4T__) || __ARM_ARCH >= 5
#define ARM_HAS_BX
#endif
-#if !defined(__ARM_FEATURE_CLZ) && \
+#if !defined(__ARM_FEATURE_CLZ) && __ARM_ARCH_ISA_THUMB != 1 && \
(__ARM_ARCH >= 6 || (__ARM_ARCH == 5 && !defined(__ARM_ARCH_5__)))
#define __ARM_FEATURE_CLZ
#endif
#if __ARM_ARCH_ISA_THUMB == 2
#define IT(cond) it cond
#define ITT(cond) itt cond
+#define ITE(cond) ite cond
#else
#define IT(cond)
#define ITT(cond)
+#define ITE(cond)
#endif
#if __ARM_ARCH_ISA_THUMB == 2
#define DEFINE_COMPILERRT_FUNCTION_ALIAS(name, target) \
.globl SYMBOL_NAME(name) SEPARATOR \
SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \
+ DECLARE_SYMBOL_VISIBILITY(SYMBOL_NAME(name)) SEPARATOR \
.set SYMBOL_NAME(name), SYMBOL_NAME(target) SEPARATOR
#if defined(__ARM_EABI__)
// Where the size is known at compile time, the compiler may emit calls to
// specialised versions of the above functions.
////////////////////////////////////////////////////////////////////////////////
+#ifdef __SIZEOF_INT128__
#define OPTIMISED_CASES\
OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t)\
OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t)\
OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t)\
OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t)\
- /* FIXME: __uint128_t isn't available on 32 bit platforms.
- OPTIMISED_CASE(16, IS_LOCK_FREE_16, __uint128_t)*/\
+ OPTIMISED_CASE(16, IS_LOCK_FREE_16, __uint128_t)
+#else
+#define OPTIMISED_CASES\
+ OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t)\
+ OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t)\
+ OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t)\
+ OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t)
+#endif
#define OPTIMISED_CASE(n, lockfree, type)\
type __atomic_load_##n(type *src, int model) {\
--- /dev/null
+/* ===-- bswapdi2.c - Implement __bswapdi2 ---------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __bswapdi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI uint64_t __bswapdi2(uint64_t u) {
+ return (
+ (((u)&0xff00000000000000ULL) >> 56) |
+ (((u)&0x00ff000000000000ULL) >> 40) |
+ (((u)&0x0000ff0000000000ULL) >> 24) |
+ (((u)&0x000000ff00000000ULL) >> 8) |
+ (((u)&0x00000000ff000000ULL) << 8) |
+ (((u)&0x0000000000ff0000ULL) << 24) |
+ (((u)&0x000000000000ff00ULL) << 40) |
+ (((u)&0x00000000000000ffULL) << 56));
+}
--- /dev/null
+/* ===-- bswapsi2.c - Implement __bswapsi2 ---------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __bswapsi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI uint32_t __bswapsi2(uint32_t u) {
+ return (
+ (((u)&0xff000000) >> 24) |
+ (((u)&0x00ff0000) >> 8) |
+ (((u)&0x0000ff00) << 8) |
+ (((u)&0x000000ff) << 24));
+}
#endif
#endif
-#if defined(__linux__) && defined(__arm__)
- #include <asm/unistd.h>
-#endif
-
/*
* The compiler generates calls to __clear_cache() when creating
* trampoline functions on the stack for use with nested functions.
*/
void __clear_cache(void *start, void *end) {
-#if __i386__ || __x86_64__
+#if __i386__ || __x86_64__ || defined(_M_IX86) || defined(_M_X64)
/*
* Intel processors have a unified instruction and data cache
* so there is nothing to do
sysarch(ARM_SYNC_ICACHE, &arg);
#elif defined(__linux__)
+ /*
+ * We used to include asm/unistd.h for the __ARM_NR_cacheflush define, but
+ * it also brought many other unused defines, as well as a dependency on
+ * kernel headers to be installed.
+ *
+ * This value is stable at least since Linux 3.13 and should remain so for
+ * compatibility reasons, warranting it's re-definition here.
+ */
+ #define __ARM_NR_cacheflush 0x0f0002
register int start_reg __asm("r0") = (int) (intptr_t) start;
const register int end_reg __asm("r1") = (int) (intptr_t) end;
+ const register int flags __asm("r2") = 0;
const register int syscall_nr __asm("r7") = __ARM_NR_cacheflush;
__asm __volatile("svc 0x0"
: "=r"(start_reg)
- : "r"(syscall_nr), "r"(start_reg), "r"(end_reg));
+ : "r"(syscall_nr), "r"(start_reg), "r"(end_reg),
+ "r"(flags));
if (start_reg != 0) {
compilerrt_abort();
}
}
}
-ARM_EABI_FNALIAS(dcmpun, unorddf2)
-
COMPILER_RT_ABI int
__unorddf2(fp_t a, fp_t b) {
const rep_t aAbs = toRep(a) & absMask;
return __gedf2(a, b);
}
+#if defined(__ARM_EABI__)
+AEABI_RTABI int __aeabi_dcmpun(fp_t a, fp_t b) {
+ return __unorddf2(a, b);
+}
+#endif
+
}
}
-ARM_EABI_FNALIAS(fcmpun, unordsf2)
-
COMPILER_RT_ABI int
__unordsf2(fp_t a, fp_t b) {
const rep_t aAbs = toRep(a) & absMask;
__gtsf2(fp_t a, fp_t b) {
return __gesf2(a, b);
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI int __aeabi_fcmpun(fp_t a, fp_t b) {
+ return __unordsf2(a, b);
+}
+#endif
+
#include <intrin.h>
#endif
+#ifndef __has_attribute
+#define __has_attribute(attr) 0
+#endif
+
enum VendorSignatures {
SIG_INTEL = 0x756e6547 /* Genu */,
SIG_AMD = 0x68747541 /* Auth */
};
enum ProcessorTypes {
- INTEL_ATOM = 1,
+ INTEL_BONNELL = 1,
INTEL_CORE2,
INTEL_COREI7,
AMDFAM10H,
AMDFAM15H,
- INTEL_i386,
- INTEL_i486,
- INTEL_PENTIUM,
- INTEL_PENTIUM_PRO,
- INTEL_PENTIUM_II,
- INTEL_PENTIUM_III,
- INTEL_PENTIUM_IV,
- INTEL_PENTIUM_M,
- INTEL_CORE_DUO,
- INTEL_XEONPHI,
- INTEL_X86_64,
- INTEL_NOCONA,
- INTEL_PRESCOTT,
- AMD_i486,
- AMDPENTIUM,
- AMDATHLON,
- AMDFAM14H,
- AMDFAM16H,
+ INTEL_SILVERMONT,
+ INTEL_KNL,
+ AMD_BTVER1,
+ AMD_BTVER2,
+ AMDFAM17H,
CPU_TYPE_MAX
};
AMDFAM10H_ISTANBUL,
AMDFAM15H_BDVER1,
AMDFAM15H_BDVER2,
- INTEL_PENTIUM_MMX,
- INTEL_CORE2_65,
- INTEL_CORE2_45,
+ AMDFAM15H_BDVER3,
+ AMDFAM15H_BDVER4,
+ AMDFAM17H_ZNVER1,
INTEL_COREI7_IVYBRIDGE,
INTEL_COREI7_HASWELL,
INTEL_COREI7_BROADWELL,
INTEL_COREI7_SKYLAKE,
INTEL_COREI7_SKYLAKE_AVX512,
- INTEL_ATOM_BONNELL,
- INTEL_ATOM_SILVERMONT,
- INTEL_KNIGHTS_LANDING,
- AMDPENTIUM_K6,
- AMDPENTIUM_K62,
- AMDPENTIUM_K63,
- AMDPENTIUM_GEODE,
- AMDATHLON_TBIRD,
- AMDATHLON_MP,
- AMDATHLON_XP,
- AMDATHLON_K8SSE3,
- AMDATHLON_OPTERON,
- AMDATHLON_FX,
- AMDATHLON_64,
- AMD_BTVER1,
- AMD_BTVER2,
- AMDFAM15H_BDVER3,
- AMDFAM15H_BDVER4,
CPU_SUBTYPE_MAX
};
FEATURE_SSE4_2,
FEATURE_AVX,
FEATURE_AVX2,
- FEATURE_AVX512,
- FEATURE_AVX512SAVE,
- FEATURE_MOVBE,
- FEATURE_ADX,
- FEATURE_EM64T
+ FEATURE_SSE4_A,
+ FEATURE_FMA4,
+ FEATURE_XOP,
+ FEATURE_FMA,
+ FEATURE_AVX512F,
+ FEATURE_BMI,
+ FEATURE_BMI2,
+ FEATURE_AES,
+ FEATURE_PCLMUL,
+ FEATURE_AVX512VL,
+ FEATURE_AVX512BW,
+ FEATURE_AVX512DQ,
+ FEATURE_AVX512CD,
+ FEATURE_AVX512ER,
+ FEATURE_AVX512PF,
+ FEATURE_AVX512VBMI,
+ FEATURE_AVX512IFMA,
+ FEATURE_AVX5124VNNIW,
+ FEATURE_AVX5124FMAPS,
+ FEATURE_AVX512VPOPCNTDQ
};
// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
/// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
/// the specified arguments. If we can't run cpuid on the host, return true.
-static void getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
+static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
unsigned *rECX, unsigned *rEDX) {
#if defined(__GNUC__) || defined(__clang__)
#if defined(__x86_64__)
- // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
+ // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
+ // FIXME: should we save this for Clang?
__asm__("movq\t%%rbx, %%rsi\n\t"
"cpuid\n\t"
"xchgq\t%%rbx, %%rsi\n\t"
: "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
: "a"(value));
+ return false;
#elif defined(__i386__)
__asm__("movl\t%%ebx, %%esi\n\t"
"cpuid\n\t"
"xchgl\t%%ebx, %%esi\n\t"
: "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
: "a"(value));
-// pedantic #else returns to appease -Wunreachable-code (so we don't generate
-// postprocessed code that looks like "return true; return false;")
+ return false;
#else
- assert(0 && "This method is defined only for x86.");
+ return true;
#endif
#elif defined(_MSC_VER)
// The MSVC intrinsic is portable across x86 and x64.
*rEBX = registers[1];
*rECX = registers[2];
*rEDX = registers[3];
+ return false;
#else
- assert(0 && "This method is defined only for GNUC, Clang or MSVC.");
+ return true;
#endif
}
/// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
/// the 4 values in the specified arguments. If we can't run cpuid on the host,
/// return true.
-static void getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
+static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
unsigned *rEDX) {
-#if defined(__x86_64__) || defined(_M_X64)
#if defined(__GNUC__) || defined(__clang__)
+#if defined(__x86_64__)
// gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
// FIXME: should we save this for Clang?
__asm__("movq\t%%rbx, %%rsi\n\t"
"xchgq\t%%rbx, %%rsi\n\t"
: "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
: "a"(value), "c"(subleaf));
-#elif defined(_MSC_VER)
- int registers[4];
- __cpuidex(registers, value, subleaf);
- *rEAX = registers[0];
- *rEBX = registers[1];
- *rECX = registers[2];
- *rEDX = registers[3];
-#else
- assert(0 && "This method is defined only for GNUC, Clang or MSVC.");
-#endif
-#elif defined(__i386__) || defined(_M_IX86)
-#if defined(__GNUC__) || defined(__clang__)
+ return false;
+#elif defined(__i386__)
__asm__("movl\t%%ebx, %%esi\n\t"
"cpuid\n\t"
"xchgl\t%%ebx, %%esi\n\t"
: "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
: "a"(value), "c"(subleaf));
-#elif defined(_MSC_VER)
- __asm {
- mov eax,value
- mov ecx,subleaf
- cpuid
- mov esi,rEAX
- mov dword ptr [esi],eax
- mov esi,rEBX
- mov dword ptr [esi],ebx
- mov esi,rECX
- mov dword ptr [esi],ecx
- mov esi,rEDX
- mov dword ptr [esi],edx
- }
+ return false;
#else
- assert(0 && "This method is defined only for GNUC, Clang or MSVC.");
+ return true;
#endif
+#elif defined(_MSC_VER)
+ int registers[4];
+ __cpuidex(registers, value, subleaf);
+ *rEAX = registers[0];
+ *rEBX = registers[1];
+ *rECX = registers[2];
+ *rEDX = registers[3];
+ return false;
#else
- assert(0 && "This method is defined only for x86.");
+ return true;
#endif
}
}
}
-static void getIntelProcessorTypeAndSubtype(unsigned int Family,
- unsigned int Model,
- unsigned int Brand_id,
- unsigned int Features,
- unsigned *Type, unsigned *Subtype) {
+static void
+getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
+ unsigned Brand_id, unsigned Features,
+ unsigned *Type, unsigned *Subtype) {
if (Brand_id != 0)
return;
switch (Family) {
- case 3:
- *Type = INTEL_i386;
- break;
- case 4:
- switch (Model) {
- case 0: // Intel486 DX processors
- case 1: // Intel486 DX processors
- case 2: // Intel486 SX processors
- case 3: // Intel487 processors, IntelDX2 OverDrive processors,
- // IntelDX2 processors
- case 4: // Intel486 SL processor
- case 5: // IntelSX2 processors
- case 7: // Write-Back Enhanced IntelDX2 processors
- case 8: // IntelDX4 OverDrive processors, IntelDX4 processors
- default:
- *Type = INTEL_i486;
- break;
- }
- case 5:
- switch (Model) {
- case 1: // Pentium OverDrive processor for Pentium processor (60, 66),
- // Pentium processors (60, 66)
- case 2: // Pentium OverDrive processor for Pentium processor (75, 90,
- // 100, 120, 133), Pentium processors (75, 90, 100, 120, 133,
- // 150, 166, 200)
- case 3: // Pentium OverDrive processors for Intel486 processor-based
- // systems
- *Type = INTEL_PENTIUM;
- break;
- case 4: // Pentium OverDrive processor with MMX technology for Pentium
- // processor (75, 90, 100, 120, 133), Pentium processor with
- // MMX technology (166, 200)
- *Type = INTEL_PENTIUM;
- *Subtype = INTEL_PENTIUM_MMX;
- break;
- default:
- *Type = INTEL_PENTIUM;
- break;
- }
case 6:
switch (Model) {
- case 0x01: // Pentium Pro processor
- *Type = INTEL_PENTIUM_PRO;
- break;
- case 0x03: // Intel Pentium II OverDrive processor, Pentium II processor,
- // model 03
- case 0x05: // Pentium II processor, model 05, Pentium II Xeon processor,
- // model 05, and Intel Celeron processor, model 05
- case 0x06: // Celeron processor, model 06
- *Type = INTEL_PENTIUM_II;
- break;
- case 0x07: // Pentium III processor, model 07, and Pentium III Xeon
- // processor, model 07
- case 0x08: // Pentium III processor, model 08, Pentium III Xeon processor,
- // model 08, and Celeron processor, model 08
- case 0x0a: // Pentium III Xeon processor, model 0Ah
- case 0x0b: // Pentium III processor, model 0Bh
- *Type = INTEL_PENTIUM_III;
- break;
- case 0x09: // Intel Pentium M processor, Intel Celeron M processor model 09.
- case 0x0d: // Intel Pentium M processor, Intel Celeron M processor, model
- // 0Dh. All processors are manufactured using the 90 nm process.
- case 0x15: // Intel EP80579 Integrated Processor and Intel EP80579
- // Integrated Processor with Intel QuickAssist Technology
- *Type = INTEL_PENTIUM_M;
- break;
- case 0x0e: // Intel Core Duo processor, Intel Core Solo processor, model
- // 0Eh. All processors are manufactured using the 65 nm process.
- *Type = INTEL_CORE_DUO;
- break; // yonah
case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
// processor, Intel Core 2 Quad processor, Intel Core 2 Quad
// mobile processor, Intel Core 2 Extreme processor, Intel
// 0Fh. All processors are manufactured using the 65 nm process.
case 0x16: // Intel Celeron processor model 16h. All processors are
// manufactured using the 65 nm process
- *Type = INTEL_CORE2; // "core2"
- *Subtype = INTEL_CORE2_65;
- break;
case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
// 17h. All processors are manufactured using the 45 nm process.
//
case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
// the 45 nm process.
*Type = INTEL_CORE2; // "penryn"
- *Subtype = INTEL_CORE2_45;
break;
case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
// processors are manufactured using the 45 nm process.
case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
// As found in a Summer 2010 model iMac.
case 0x1f:
- case 0x2e: // Nehalem EX
+ case 0x2e: // Nehalem EX
*Type = INTEL_COREI7; // "nehalem"
*Subtype = INTEL_COREI7_NEHALEM;
break;
*Subtype = INTEL_COREI7_SANDYBRIDGE;
break;
case 0x3a:
- case 0x3e: // Ivy Bridge EP
+ case 0x3e: // Ivy Bridge EP
*Type = INTEL_COREI7; // "ivybridge"
*Subtype = INTEL_COREI7_IVYBRIDGE;
break;
break;
// Skylake:
- case 0x4e:
- *Type = INTEL_COREI7; // "skylake-avx512"
- *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
- break;
- case 0x5e:
+ case 0x4e: // Skylake mobile
+ case 0x5e: // Skylake desktop
+ case 0x8e: // Kaby Lake mobile
+ case 0x9e: // Kaby Lake desktop
*Type = INTEL_COREI7; // "skylake"
*Subtype = INTEL_COREI7_SKYLAKE;
break;
+ // Skylake Xeon:
+ case 0x55:
+ *Type = INTEL_COREI7;
+ *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
+ break;
+
case 0x1c: // Most 45 nm Intel Atom processors
case 0x26: // 45 nm Atom Lincroft
case 0x27: // 32 nm Atom Medfield
case 0x35: // 32 nm Atom Midview
case 0x36: // 32 nm Atom Midview
- *Type = INTEL_ATOM;
- *Subtype = INTEL_ATOM_BONNELL;
+ *Type = INTEL_BONNELL;
break; // "bonnell"
// Atom Silvermont codes from the Intel software optimization guide.
case 0x5a:
case 0x5d:
case 0x4c: // really airmont
- *Type = INTEL_ATOM;
- *Subtype = INTEL_ATOM_SILVERMONT;
+ *Type = INTEL_SILVERMONT;
break; // "silvermont"
case 0x57:
- *Type = INTEL_XEONPHI; // knl
- *Subtype = INTEL_KNIGHTS_LANDING;
+ *Type = INTEL_KNL; // knl
break;
- default: // Unknown family 6 CPU, try to guess.
- if (Features & (1 << FEATURE_AVX512)) {
- *Type = INTEL_XEONPHI; // knl
- *Subtype = INTEL_KNIGHTS_LANDING;
- break;
- }
- if (Features & (1 << FEATURE_ADX)) {
- *Type = INTEL_COREI7;
- *Subtype = INTEL_COREI7_BROADWELL;
- break;
- }
- if (Features & (1 << FEATURE_AVX2)) {
- *Type = INTEL_COREI7;
- *Subtype = INTEL_COREI7_HASWELL;
- break;
- }
- if (Features & (1 << FEATURE_AVX)) {
- *Type = INTEL_COREI7;
- *Subtype = INTEL_COREI7_SANDYBRIDGE;
- break;
- }
- if (Features & (1 << FEATURE_SSE4_2)) {
- if (Features & (1 << FEATURE_MOVBE)) {
- *Type = INTEL_ATOM;
- *Subtype = INTEL_ATOM_SILVERMONT;
- } else {
- *Type = INTEL_COREI7;
- *Subtype = INTEL_COREI7_NEHALEM;
- }
- break;
- }
- if (Features & (1 << FEATURE_SSE4_1)) {
- *Type = INTEL_CORE2; // "penryn"
- *Subtype = INTEL_CORE2_45;
- break;
- }
- if (Features & (1 << FEATURE_SSSE3)) {
- if (Features & (1 << FEATURE_MOVBE)) {
- *Type = INTEL_ATOM;
- *Subtype = INTEL_ATOM_BONNELL; // "bonnell"
- } else {
- *Type = INTEL_CORE2; // "core2"
- *Subtype = INTEL_CORE2_65;
- }
- break;
- }
- if (Features & (1 << FEATURE_EM64T)) {
- *Type = INTEL_X86_64;
- break; // x86-64
- }
- if (Features & (1 << FEATURE_SSE2)) {
- *Type = INTEL_PENTIUM_M;
- break;
- }
- if (Features & (1 << FEATURE_SSE)) {
- *Type = INTEL_PENTIUM_III;
- break;
- }
- if (Features & (1 << FEATURE_MMX)) {
- *Type = INTEL_PENTIUM_II;
- break;
- }
- *Type = INTEL_PENTIUM_PRO;
- break;
- }
- case 15: {
- switch (Model) {
- case 0: // Pentium 4 processor, Intel Xeon processor. All processors are
- // model 00h and manufactured using the 0.18 micron process.
- case 1: // Pentium 4 processor, Intel Xeon processor, Intel Xeon
- // processor MP, and Intel Celeron processor. All processors are
- // model 01h and manufactured using the 0.18 micron process.
- case 2: // Pentium 4 processor, Mobile Intel Pentium 4 processor - M,
- // Intel Xeon processor, Intel Xeon processor MP, Intel Celeron
- // processor, and Mobile Intel Celeron processor. All processors
- // are model 02h and manufactured using the 0.13 micron process.
- *Type =
- ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV);
- break;
-
- case 3: // Pentium 4 processor, Intel Xeon processor, Intel Celeron D
- // processor. All processors are model 03h and manufactured using
- // the 90 nm process.
- case 4: // Pentium 4 processor, Pentium 4 processor Extreme Edition,
- // Pentium D processor, Intel Xeon processor, Intel Xeon
- // processor MP, Intel Celeron D processor. All processors are
- // model 04h and manufactured using the 90 nm process.
- case 6: // Pentium 4 processor, Pentium D processor, Pentium processor
- // Extreme Edition, Intel Xeon processor, Intel Xeon processor
- // MP, Intel Celeron D processor. All processors are model 06h
- // and manufactured using the 65 nm process.
- *Type =
- ((Features & (1 << FEATURE_EM64T)) ? INTEL_NOCONA : INTEL_PRESCOTT);
- break;
-
- default:
- *Type =
- ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV);
+ default: // Unknown family 6 CPU.
break;
+ break;
}
- }
default:
- break; /*"generic"*/
+ break; // Unknown.
}
}
-static void getAMDProcessorTypeAndSubtype(unsigned int Family,
- unsigned int Model,
- unsigned int Features, unsigned *Type,
+static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
+ unsigned Features, unsigned *Type,
unsigned *Subtype) {
// FIXME: this poorly matches the generated SubtargetFeatureKV table. There
// appears to be no way to generate the wide variety of AMD-specific targets
// from the information returned from CPUID.
switch (Family) {
- case 4:
- *Type = AMD_i486;
- case 5:
- *Type = AMDPENTIUM;
- switch (Model) {
- case 6:
- case 7:
- *Subtype = AMDPENTIUM_K6;
- break; // "k6"
- case 8:
- *Subtype = AMDPENTIUM_K62;
- break; // "k6-2"
- case 9:
- case 13:
- *Subtype = AMDPENTIUM_K63;
- break; // "k6-3"
- case 10:
- *Subtype = AMDPENTIUM_GEODE;
- break; // "geode"
- default:
- break;
- }
- case 6:
- *Type = AMDATHLON;
- switch (Model) {
- case 4:
- *Subtype = AMDATHLON_TBIRD;
- break; // "athlon-tbird"
- case 6:
- case 7:
- case 8:
- *Subtype = AMDATHLON_MP;
- break; // "athlon-mp"
- case 10:
- *Subtype = AMDATHLON_XP;
- break; // "athlon-xp"
- default:
- break;
- }
- case 15:
- *Type = AMDATHLON;
- if (Features & (1 << FEATURE_SSE3)) {
- *Subtype = AMDATHLON_K8SSE3;
- break; // "k8-sse3"
- }
- switch (Model) {
- case 1:
- *Subtype = AMDATHLON_OPTERON;
- break; // "opteron"
- case 5:
- *Subtype = AMDATHLON_FX;
- break; // "athlon-fx"; also opteron
- default:
- *Subtype = AMDATHLON_64;
- break; // "athlon64"
- }
case 16:
*Type = AMDFAM10H; // "amdfam10"
switch (Model) {
case 8:
*Subtype = AMDFAM10H_ISTANBUL;
break;
- default:
- break;
}
+ break;
case 20:
- *Type = AMDFAM14H;
- *Subtype = AMD_BTVER1;
+ *Type = AMD_BTVER1;
break; // "btver1";
case 21:
*Type = AMDFAM15H;
- if (!(Features &
- (1 << FEATURE_AVX))) { // If no AVX support, provide a sane fallback.
- *Subtype = AMD_BTVER1;
- break; // "btver1"
- }
- if (Model >= 0x50 && Model <= 0x6f) {
+ if (Model >= 0x60 && Model <= 0x7f) {
*Subtype = AMDFAM15H_BDVER4;
- break; // "bdver4"; 50h-6Fh: Excavator
+ break; // "bdver4"; 60h-7Fh: Excavator
}
if (Model >= 0x30 && Model <= 0x3f) {
*Subtype = AMDFAM15H_BDVER3;
}
break;
case 22:
- *Type = AMDFAM16H;
- if (!(Features &
- (1 << FEATURE_AVX))) { // If no AVX support provide a sane fallback.
- *Subtype = AMD_BTVER1;
- break; // "btver1";
- }
- *Subtype = AMD_BTVER2;
+ *Type = AMD_BTVER2;
break; // "btver2"
+ case 23:
+ *Type = AMDFAM17H;
+ *Subtype = AMDFAM17H_ZNVER1;
+ break;
default:
break; // "generic"
}
}
-static unsigned getAvailableFeatures(unsigned int ECX, unsigned int EDX,
- unsigned MaxLeaf) {
+static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
+ unsigned *FeaturesOut) {
unsigned Features = 0;
- unsigned int EAX, EBX;
- Features |= (((EDX >> 23) & 1) << FEATURE_MMX);
- Features |= (((EDX >> 25) & 1) << FEATURE_SSE);
- Features |= (((EDX >> 26) & 1) << FEATURE_SSE2);
- Features |= (((ECX >> 0) & 1) << FEATURE_SSE3);
- Features |= (((ECX >> 9) & 1) << FEATURE_SSSE3);
- Features |= (((ECX >> 19) & 1) << FEATURE_SSE4_1);
- Features |= (((ECX >> 20) & 1) << FEATURE_SSE4_2);
- Features |= (((ECX >> 22) & 1) << FEATURE_MOVBE);
+ unsigned EAX, EBX;
+
+ if ((EDX >> 15) & 1)
+ Features |= 1 << FEATURE_CMOV;
+ if ((EDX >> 23) & 1)
+ Features |= 1 << FEATURE_MMX;
+ if ((EDX >> 25) & 1)
+ Features |= 1 << FEATURE_SSE;
+ if ((EDX >> 26) & 1)
+ Features |= 1 << FEATURE_SSE2;
+
+ if ((ECX >> 0) & 1)
+ Features |= 1 << FEATURE_SSE3;
+ if ((ECX >> 1) & 1)
+ Features |= 1 << FEATURE_PCLMUL;
+ if ((ECX >> 9) & 1)
+ Features |= 1 << FEATURE_SSSE3;
+ if ((ECX >> 12) & 1)
+ Features |= 1 << FEATURE_FMA;
+ if ((ECX >> 19) & 1)
+ Features |= 1 << FEATURE_SSE4_1;
+ if ((ECX >> 20) & 1)
+ Features |= 1 << FEATURE_SSE4_2;
+ if ((ECX >> 23) & 1)
+ Features |= 1 << FEATURE_POPCNT;
+ if ((ECX >> 25) & 1)
+ Features |= 1 << FEATURE_AES;
// If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
// indicates that the AVX registers will be saved and restored on context
bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
((EAX & 0x6) == 0x6);
bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
- bool HasLeaf7 = MaxLeaf >= 0x7;
- getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
- bool HasADX = HasLeaf7 && ((EBX >> 19) & 1);
- bool HasAVX2 = HasAVX && HasLeaf7 && (EBX & 0x20);
- bool HasAVX512 = HasLeaf7 && HasAVX512Save && ((EBX >> 16) & 1);
- Features |= (HasAVX << FEATURE_AVX);
- Features |= (HasAVX2 << FEATURE_AVX2);
- Features |= (HasAVX512 << FEATURE_AVX512);
- Features |= (HasAVX512Save << FEATURE_AVX512SAVE);
- Features |= (HasADX << FEATURE_ADX);
-
- getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
- Features |= (((EDX >> 29) & 0x1) << FEATURE_EM64T);
- return Features;
+
+ if (HasAVX)
+ Features |= 1 << FEATURE_AVX;
+
+ bool HasLeaf7 =
+ MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
+
+ if (HasLeaf7 && ((EBX >> 3) & 1))
+ Features |= 1 << FEATURE_BMI;
+ if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
+ Features |= 1 << FEATURE_AVX2;
+ if (HasLeaf7 && ((EBX >> 9) & 1))
+ Features |= 1 << FEATURE_BMI2;
+ if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
+ Features |= 1 << FEATURE_AVX512F;
+ if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
+ Features |= 1 << FEATURE_AVX512DQ;
+ if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
+ Features |= 1 << FEATURE_AVX512IFMA;
+ if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
+ Features |= 1 << FEATURE_AVX512PF;
+ if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
+ Features |= 1 << FEATURE_AVX512ER;
+ if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
+ Features |= 1 << FEATURE_AVX512CD;
+ if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
+ Features |= 1 << FEATURE_AVX512BW;
+ if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
+ Features |= 1 << FEATURE_AVX512VL;
+
+ if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
+ Features |= 1 << FEATURE_AVX512VBMI;
+ if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
+ Features |= 1 << FEATURE_AVX512VPOPCNTDQ;
+
+ if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
+ Features |= 1 << FEATURE_AVX5124VNNIW;
+ if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
+ Features |= 1 << FEATURE_AVX5124FMAPS;
+
+ unsigned MaxExtLevel;
+ getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
+
+ bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
+ !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+ if (HasExtLeaf1 && ((ECX >> 6) & 1))
+ Features |= 1 << FEATURE_SSE4_A;
+ if (HasExtLeaf1 && ((ECX >> 11) & 1))
+ Features |= 1 << FEATURE_XOP;
+ if (HasExtLeaf1 && ((ECX >> 16) & 1))
+ Features |= 1 << FEATURE_FMA4;
+
+ *FeaturesOut = Features;
}
-#ifdef HAVE_INIT_PRIORITY
-#define CONSTRUCTOR_PRIORITY (101)
+#if defined(HAVE_INIT_PRIORITY)
+#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101))
+#elif __has_attribute(__constructor__)
+#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__))
#else
-#define CONSTRUCTOR_PRIORITY
+// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
+// this runs during initialization.
+#define CONSTRUCTOR_ATTRIBUTE
#endif
-int __cpu_indicator_init(void)
- __attribute__((constructor CONSTRUCTOR_PRIORITY));
+int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
struct __processor_model {
unsigned int __cpu_vendor;
the priority set. However, it still runs after ifunc initializers and
needs to be called explicitly there. */
-int __attribute__((constructor CONSTRUCTOR_PRIORITY))
+int CONSTRUCTOR_ATTRIBUTE
__cpu_indicator_init(void) {
- unsigned int EAX, EBX, ECX, EDX;
- unsigned int MaxLeaf = 5;
- unsigned int Vendor;
- unsigned int Model, Family, Brand_id;
- unsigned int Features = 0;
+ unsigned EAX, EBX, ECX, EDX;
+ unsigned MaxLeaf = 5;
+ unsigned Vendor;
+ unsigned Model, Family, Brand_id;
+ unsigned Features = 0;
/* This function needs to run just once. */
if (__cpu_model.__cpu_vendor)
return -1;
/* Assume cpuid insn present. Run in level 0 to get vendor id. */
- getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX);
-
- if (MaxLeaf < 1) {
+ if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
__cpu_model.__cpu_vendor = VENDOR_OTHER;
return -1;
}
Brand_id = EBX & 0xff;
/* Find available features. */
- Features = getAvailableFeatures(ECX, EDX, MaxLeaf);
+ getAvailableFeatures(ECX, EDX, MaxLeaf, &Features);
__cpu_model.__cpu_features[0] = Features;
if (Vendor == SIG_INTEL) {
#define DOUBLE_PRECISION
#include "fp_lib.h"
-ARM_EABI_FNALIAS(ddiv, divdf3)
-
COMPILER_RT_ABI fp_t
__divdf3(fp_t a, fp_t b) {
return result;
}
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_ddiv(fp_t a, fp_t b) {
+ return __divdf3(a, b);
+}
+#endif
+
#define SINGLE_PRECISION
#include "fp_lib.h"
-ARM_EABI_FNALIAS(fdiv, divsf3)
-
COMPILER_RT_ABI fp_t
__divsf3(fp_t a, fp_t b) {
return fromRep(absResult | quotientSign);
}
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_fdiv(fp_t a, fp_t b) {
+ return __divsf3(a, b);
+}
+#endif
+
/* Returns: a / b */
-ARM_EABI_FNALIAS(idiv, divsi3)
-
COMPILER_RT_ABI si_int
__divsi3(si_int a, si_int b)
{
*/
return ((su_int)a/(su_int)b ^ s_a) - s_a; /* negate if s_a == -1 */
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI si_int __aeabi_idiv(si_int a, si_int b) {
+ return __divsi3(a, b);
+}
+#endif
+
/* Returns: the quotient of (a + ib) / (c + id) */
-COMPILER_RT_ABI long double _Complex
+COMPILER_RT_ABI Lcomplex
__divtc3(long double __a, long double __b, long double __c, long double __d)
{
int __ilogbw = 0;
__d = crt_scalbnl(__d, -__ilogbw);
}
long double __denom = __c * __c + __d * __d;
- long double _Complex z;
- __real__ z = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw);
- __imag__ z = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw);
- if (crt_isnan(__real__ z) && crt_isnan(__imag__ z))
+ Lcomplex z;
+ COMPLEX_REAL(z) = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw);
+ COMPLEX_IMAGINARY(z) = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw);
+ if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
{
if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b)))
{
- __real__ z = crt_copysignl(CRT_INFINITY, __c) * __a;
- __imag__ z = crt_copysignl(CRT_INFINITY, __c) * __b;
+ COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a;
+ COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b;
}
else if ((crt_isinf(__a) || crt_isinf(__b)) &&
crt_isfinite(__c) && crt_isfinite(__d))
{
__a = crt_copysignl(crt_isinf(__a) ? 1.0 : 0.0, __a);
__b = crt_copysignl(crt_isinf(__b) ? 1.0 : 0.0, __b);
- __real__ z = CRT_INFINITY * (__a * __c + __b * __d);
- __imag__ z = CRT_INFINITY * (__b * __c - __a * __d);
+ COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
+ COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
}
else if (crt_isinf(__logbw) && __logbw > 0.0 &&
crt_isfinite(__a) && crt_isfinite(__b))
{
__c = crt_copysignl(crt_isinf(__c) ? 1.0 : 0.0, __c);
__d = crt_copysignl(crt_isinf(__d) ? 1.0 : 0.0, __d);
- __real__ z = 0.0 * (__a * __c + __b * __d);
- __imag__ z = 0.0 * (__b * __c - __a * __d);
+ COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d);
+ COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d);
}
}
return z;
*
* ===----------------------------------------------------------------------===
*/
-#include <pthread.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "int_lib.h"
#include "int_util.h"
+typedef struct emutls_address_array {
+ uintptr_t size; /* number of elements in the 'data' array */
+ void* data[];
+} emutls_address_array;
+
+static void emutls_shutdown(emutls_address_array *array);
+
+#ifndef _WIN32
+
+#include <pthread.h>
+
+static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_key_t emutls_pthread_key;
+
+typedef unsigned int gcc_word __attribute__((mode(word)));
+typedef unsigned int gcc_pointer __attribute__((mode(pointer)));
+
/* Default is not to use posix_memalign, so systems like Android
* can use thread local data without heavier POSIX memory allocators.
*/
#define EMUTLS_USE_POSIX_MEMALIGN 0
#endif
-/* For every TLS variable xyz,
- * there is one __emutls_control variable named __emutls_v.xyz.
- * If xyz has non-zero initial value, __emutls_v.xyz's "value"
- * will point to __emutls_t.xyz, which has the initial value.
- */
-typedef unsigned int gcc_word __attribute__((mode(word)));
-typedef struct __emutls_control {
- /* Must use gcc_word here, instead of size_t, to match GCC. When
- gcc_word is larger than size_t, the upper extra bits are all
- zeros. We can use variables of size_t to operate on size and
- align. */
- gcc_word size; /* size of the object in bytes */
- gcc_word align; /* alignment of the object in bytes */
- union {
- uintptr_t index; /* data[index-1] is the object address */
- void* address; /* object address, when in single thread env */
- } object;
- void* value; /* null or non-zero initial value for the object */
-} __emutls_control;
-
static __inline void *emutls_memalign_alloc(size_t align, size_t size) {
void *base;
#if EMUTLS_USE_POSIX_MEMALIGN
#else
#define EXTRA_ALIGN_PTR_BYTES (align - 1 + sizeof(void*))
char* object;
- if ((object = malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL)
+ if ((object = (char*)malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL)
abort();
base = (void*)(((uintptr_t)(object + EXTRA_ALIGN_PTR_BYTES))
& ~(uintptr_t)(align - 1));
#endif
}
+static void emutls_key_destructor(void* ptr) {
+ emutls_shutdown((emutls_address_array*)ptr);
+ free(ptr);
+}
+
+static __inline void emutls_init(void) {
+ if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0)
+ abort();
+}
+
+static __inline void emutls_init_once(void) {
+ static pthread_once_t once = PTHREAD_ONCE_INIT;
+ pthread_once(&once, emutls_init);
+}
+
+static __inline void emutls_lock() {
+ pthread_mutex_lock(&emutls_mutex);
+}
+
+static __inline void emutls_unlock() {
+ pthread_mutex_unlock(&emutls_mutex);
+}
+
+static __inline void emutls_setspecific(emutls_address_array *value) {
+ pthread_setspecific(emutls_pthread_key, (void*) value);
+}
+
+static __inline emutls_address_array* emutls_getspecific() {
+ return (emutls_address_array*) pthread_getspecific(emutls_pthread_key);
+}
+
+#else
+
+#include <windows.h>
+#include <malloc.h>
+#include <stdio.h>
+#include <assert.h>
+#include <immintrin.h>
+
+static LPCRITICAL_SECTION emutls_mutex;
+static DWORD emutls_tls_index = TLS_OUT_OF_INDEXES;
+
+typedef uintptr_t gcc_word;
+typedef void * gcc_pointer;
+
+static void win_error(DWORD last_err, const char *hint) {
+ char *buffer = NULL;
+ if (FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER |
+ FORMAT_MESSAGE_FROM_SYSTEM |
+ FORMAT_MESSAGE_MAX_WIDTH_MASK,
+ NULL, last_err, 0, (LPSTR)&buffer, 1, NULL)) {
+ fprintf(stderr, "Windows error: %s\n", buffer);
+ } else {
+ fprintf(stderr, "Unkown Windows error: %s\n", hint);
+ }
+ LocalFree(buffer);
+}
+
+static __inline void win_abort(DWORD last_err, const char *hint) {
+ win_error(last_err, hint);
+ abort();
+}
+
+static __inline void *emutls_memalign_alloc(size_t align, size_t size) {
+ void *base = _aligned_malloc(size, align);
+ if (!base)
+ win_abort(GetLastError(), "_aligned_malloc");
+ return base;
+}
+
+static __inline void emutls_memalign_free(void *base) {
+ _aligned_free(base);
+}
+
+static void emutls_exit(void) {
+ if (emutls_mutex) {
+ DeleteCriticalSection(emutls_mutex);
+ _aligned_free(emutls_mutex);
+ emutls_mutex = NULL;
+ }
+ if (emutls_tls_index != TLS_OUT_OF_INDEXES) {
+ emutls_shutdown((emutls_address_array*)TlsGetValue(emutls_tls_index));
+ TlsFree(emutls_tls_index);
+ emutls_tls_index = TLS_OUT_OF_INDEXES;
+ }
+}
+
+#pragma warning (push)
+#pragma warning (disable : 4100)
+static BOOL CALLBACK emutls_init(PINIT_ONCE p0, PVOID p1, PVOID *p2) {
+ emutls_mutex = (LPCRITICAL_SECTION)_aligned_malloc(sizeof(CRITICAL_SECTION), 16);
+ if (!emutls_mutex) {
+ win_error(GetLastError(), "_aligned_malloc");
+ return FALSE;
+ }
+ InitializeCriticalSection(emutls_mutex);
+
+ emutls_tls_index = TlsAlloc();
+ if (emutls_tls_index == TLS_OUT_OF_INDEXES) {
+ emutls_exit();
+ win_error(GetLastError(), "TlsAlloc");
+ return FALSE;
+ }
+ atexit(&emutls_exit);
+ return TRUE;
+}
+
+static __inline void emutls_init_once(void) {
+ static INIT_ONCE once;
+ InitOnceExecuteOnce(&once, emutls_init, NULL, NULL);
+}
+
+static __inline void emutls_lock() {
+ EnterCriticalSection(emutls_mutex);
+}
+
+static __inline void emutls_unlock() {
+ LeaveCriticalSection(emutls_mutex);
+}
+
+static __inline void emutls_setspecific(emutls_address_array *value) {
+ if (TlsSetValue(emutls_tls_index, (LPVOID) value) == 0)
+ win_abort(GetLastError(), "TlsSetValue");
+}
+
+static __inline emutls_address_array* emutls_getspecific() {
+ LPVOID value = TlsGetValue(emutls_tls_index);
+ if (value == NULL) {
+ const DWORD err = GetLastError();
+ if (err != ERROR_SUCCESS)
+ win_abort(err, "TlsGetValue");
+ }
+ return (emutls_address_array*) value;
+}
+
+/* Provide atomic load/store functions for emutls_get_index if built with MSVC.
+ */
+#if !defined(__ATOMIC_RELEASE)
+
+enum { __ATOMIC_ACQUIRE = 2, __ATOMIC_RELEASE = 3 };
+
+static __inline uintptr_t __atomic_load_n(void *ptr, unsigned type) {
+ assert(type == __ATOMIC_ACQUIRE);
+#ifdef _WIN64
+ return (uintptr_t) _load_be_u64(ptr);
+#else
+ return (uintptr_t) _load_be_u32(ptr);
+#endif
+}
+
+static __inline void __atomic_store_n(void *ptr, uintptr_t val, unsigned type) {
+ assert(type == __ATOMIC_RELEASE);
+#ifdef _WIN64
+ _store_be_u64(ptr, val);
+#else
+ _store_be_u32(ptr, val);
+#endif
+}
+
+#endif
+
+#pragma warning (pop)
+
+#endif
+
+static size_t emutls_num_object = 0; /* number of allocated TLS objects */
+
+/* Free the allocated TLS data
+ */
+static void emutls_shutdown(emutls_address_array *array) {
+ if (array) {
+ uintptr_t i;
+ for (i = 0; i < array->size; ++i) {
+ if (array->data[i])
+ emutls_memalign_free(array->data[i]);
+ }
+ }
+}
+
+/* For every TLS variable xyz,
+ * there is one __emutls_control variable named __emutls_v.xyz.
+ * If xyz has non-zero initial value, __emutls_v.xyz's "value"
+ * will point to __emutls_t.xyz, which has the initial value.
+ */
+typedef struct __emutls_control {
+ /* Must use gcc_word here, instead of size_t, to match GCC. When
+ gcc_word is larger than size_t, the upper extra bits are all
+ zeros. We can use variables of size_t to operate on size and
+ align. */
+ gcc_word size; /* size of the object in bytes */
+ gcc_word align; /* alignment of the object in bytes */
+ union {
+ uintptr_t index; /* data[index-1] is the object address */
+ void* address; /* object address, when in single thread env */
+ } object;
+ void* value; /* null or non-zero initial value for the object */
+} __emutls_control;
+
/* Emulated TLS objects are always allocated at run-time. */
static __inline void *emutls_allocate_object(__emutls_control *control) {
/* Use standard C types, check with gcc's emutls.o. */
- typedef unsigned int gcc_pointer __attribute__((mode(pointer)));
COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(gcc_pointer));
COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(void*));
return base;
}
-static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-static size_t emutls_num_object = 0; /* number of allocated TLS objects */
-
-typedef struct emutls_address_array {
- uintptr_t size; /* number of elements in the 'data' array */
- void* data[];
-} emutls_address_array;
-
-static pthread_key_t emutls_pthread_key;
-
-static void emutls_key_destructor(void* ptr) {
- emutls_address_array* array = (emutls_address_array*)ptr;
- uintptr_t i;
- for (i = 0; i < array->size; ++i) {
- if (array->data[i])
- emutls_memalign_free(array->data[i]);
- }
- free(ptr);
-}
-
-static void emutls_init(void) {
- if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0)
- abort();
-}
/* Returns control->object.index; set index if not allocated yet. */
static __inline uintptr_t emutls_get_index(__emutls_control *control) {
uintptr_t index = __atomic_load_n(&control->object.index, __ATOMIC_ACQUIRE);
if (!index) {
- static pthread_once_t once = PTHREAD_ONCE_INIT;
- pthread_once(&once, emutls_init);
- pthread_mutex_lock(&emutls_mutex);
+ emutls_init_once();
+ emutls_lock();
index = control->object.index;
if (!index) {
index = ++emutls_num_object;
__atomic_store_n(&control->object.index, index, __ATOMIC_RELEASE);
}
- pthread_mutex_unlock(&emutls_mutex);
+ emutls_unlock();
}
return index;
}
if (array == NULL)
abort();
array->size = size;
- pthread_setspecific(emutls_pthread_key, (void*)array);
+ emutls_setspecific(array);
}
/* Returns the new 'data' array size, number of elements,
return ((index + 1 + 15) & ~((uintptr_t)15)) - 1;
}
+/* Returns the size in bytes required for an emutls_address_array with
+ * N number of elements for data field.
+ */
+static __inline uintptr_t emutls_asize(uintptr_t N) {
+ return N * sizeof(void *) + sizeof(emutls_address_array);
+}
+
/* Returns the thread local emutls_address_array.
* Extends its size if necessary to hold address at index.
*/
static __inline emutls_address_array *
emutls_get_address_array(uintptr_t index) {
- emutls_address_array* array = pthread_getspecific(emutls_pthread_key);
+ emutls_address_array* array = emutls_getspecific();
if (array == NULL) {
uintptr_t new_size = emutls_new_data_array_size(index);
- array = malloc(new_size * sizeof(void *) + sizeof(emutls_address_array));
+ array = (emutls_address_array*) malloc(emutls_asize(new_size));
if (array)
memset(array->data, 0, new_size * sizeof(void*));
emutls_check_array_set_size(array, new_size);
} else if (index > array->size) {
uintptr_t orig_size = array->size;
uintptr_t new_size = emutls_new_data_array_size(index);
- array = realloc(array, new_size * sizeof(void *) + sizeof(emutls_address_array));
+ array = (emutls_address_array*) realloc(array, emutls_asize(new_size));
if (array)
memset(array->data + orig_size, 0,
(new_size - orig_size) * sizeof(void*));
void* __emutls_get_address(__emutls_control* control) {
uintptr_t index = emutls_get_index(control);
- emutls_address_array* array = emutls_get_address_array(index);
- if (array->data[index - 1] == NULL)
- array->data[index - 1] = emutls_allocate_object(control);
- return array->data[index - 1];
+ emutls_address_array* array = emutls_get_address_array(index--);
+ if (array->data[index] == NULL)
+ array->data[index] = emutls_allocate_object(control);
+ return array->data[index];
}
#define DST_SINGLE
#include "fp_extend_impl.inc"
-ARM_EABI_FNALIAS(h2f, extendhfsf2)
-
// Use a forwarding definition and noinline to implement a poor man's alias,
// as there isn't a good cross-platform way of defining one.
COMPILER_RT_ABI NOINLINE float __extendhfsf2(uint16_t a) {
COMPILER_RT_ABI float __gnu_h2f_ieee(uint16_t a) {
return __extendhfsf2(a);
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI float __aeabi_h2f(uint16_t a) {
+ return __extendhfsf2(a);
+}
+#endif
+
#define DST_DOUBLE
#include "fp_extend_impl.inc"
-ARM_EABI_FNALIAS(f2d, extendsfdf2)
-
COMPILER_RT_ABI double __extendsfdf2(float a) {
return __extendXfYf2__(a);
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI double __aeabi_f2d(float a) {
+ return __extendsfdf2(a);
+}
+#endif
+
--- /dev/null
+/* ===-- ffssi2.c - Implement __ffssi2 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ffssi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: the index of the least significant 1-bit in a, or
+ * the value zero if a is zero. The least significant bit is index one.
+ */
+
+COMPILER_RT_ABI si_int
+__ffssi2(si_int a)
+{
+ if (a == 0)
+ {
+ return 0;
+ }
+ return __builtin_ctz(a) + 1;
+}
#define DOUBLE_PRECISION
#include "fp_lib.h"
-ARM_EABI_FNALIAS(d2lz, fixdfdi)
#ifndef __SOFT_FP__
/* Support for systems that have hardware floating-point; can set the invalid
}
#endif
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI di_int
+#if defined(__SOFT_FP__)
+__aeabi_d2lz(fp_t a) {
+#else
+__aeabi_d2lz(double a) {
+#endif
+ return __fixdfdi(a);
+}
+#endif
+
typedef su_int fixuint_t;
#include "fp_fixint_impl.inc"
-ARM_EABI_FNALIAS(d2iz, fixdfsi)
-
COMPILER_RT_ABI si_int
__fixdfsi(fp_t a) {
return __fixint(a);
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI si_int __aeabi_d2iz(fp_t a) {
+ return __fixdfsi(a);
+}
+#endif
+
#define SINGLE_PRECISION
#include "fp_lib.h"
-ARM_EABI_FNALIAS(f2lz, fixsfdi)
-
#ifndef __SOFT_FP__
/* Support for systems that have hardware floating-point; can set the invalid
* flag as a side-effect of computation.
}
#endif
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI di_int
+#if defined(__SOFT_FP__)
+__aeabi_f2lz(fp_t a) {
+#else
+__aeabi_f2lz(float a) {
+#endif
+ return __fixsfdi(a);
+}
+#endif
+
typedef su_int fixuint_t;
#include "fp_fixint_impl.inc"
-ARM_EABI_FNALIAS(f2iz, fixsfsi)
-
COMPILER_RT_ABI si_int
__fixsfsi(fp_t a) {
return __fixint(a);
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI si_int __aeabi_f2iz(fp_t a) {
+ return __fixsfsi(a);
+}
+#endif
+
#define DOUBLE_PRECISION
#include "fp_lib.h"
-ARM_EABI_FNALIAS(d2ulz, fixunsdfdi)
-
#ifndef __SOFT_FP__
/* Support for systems that have hardware floating-point; can set the invalid
* flag as a side-effect of computation.
}
#endif
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI du_int
+#if defined(__SOFT_FP__)
+__aeabi_d2ulz(fp_t a) {
+#else
+__aeabi_d2ulz(double a) {
+#endif
+ return __fixunsdfdi(a);
+}
+#endif
+
typedef su_int fixuint_t;
#include "fp_fixuint_impl.inc"
-ARM_EABI_FNALIAS(d2uiz, fixunsdfsi)
-
COMPILER_RT_ABI su_int
__fixunsdfsi(fp_t a) {
return __fixuint(a);
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI su_int __aeabi_d2uiz(fp_t a) {
+ return __fixunsdfsi(a);
+}
+#endif
+
#define SINGLE_PRECISION
#include "fp_lib.h"
-ARM_EABI_FNALIAS(f2ulz, fixunssfdi)
-
#ifndef __SOFT_FP__
/* Support for systems that have hardware floating-point; can set the invalid
* flag as a side-effect of computation.
}
#endif
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI du_int
+#if defined(__SOFT_FP__)
+__aeabi_f2ulz(fp_t a) {
+#else
+__aeabi_f2ulz(float a) {
+#endif
+ return __fixunssfdi(a);
+}
+#endif
+
typedef su_int fixuint_t;
#include "fp_fixuint_impl.inc"
-ARM_EABI_FNALIAS(f2uiz, fixunssfsi)
-
COMPILER_RT_ABI su_int
__fixunssfsi(fp_t a) {
return __fixuint(a);
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI su_int __aeabi_f2uiz(fp_t a) {
+ return __fixunssfsi(a);
+}
+#endif
+
/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */
-ARM_EABI_FNALIAS(l2d, floatdidf)
-
#ifndef __SOFT_FP__
/* Support for systems that have hardware floating-point; we'll set the inexact flag
* as a side-effect of this computation.
return fb.f;
}
#endif
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI double __aeabi_l2d(di_int a) {
+ return __floatdidf(a);
+}
+#endif
+
#include "int_lib.h"
-ARM_EABI_FNALIAS(l2f, floatdisf)
-
COMPILER_RT_ABI float
__floatdisf(di_int a)
{
((su_int)a & 0x007FFFFF); /* mantissa */
return fb.f;
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI float __aeabi_l2f(di_int a) {
+ return __floatdisf(a);
+}
+#endif
+
#include "int_lib.h"
-ARM_EABI_FNALIAS(i2d, floatsidf)
-
COMPILER_RT_ABI fp_t
__floatsidf(int a) {
// Insert the sign bit and return
return fromRep(result | sign);
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_i2d(int a) {
+ return __floatsidf(a);
+}
+#endif
+
#include "int_lib.h"
-ARM_EABI_FNALIAS(i2f, floatsisf)
-
COMPILER_RT_ABI fp_t
__floatsisf(int a) {
// Insert the sign bit and return
return fromRep(result | sign);
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_i2f(int a) {
+ return __floatsisf(a);
+}
+#endif
+
--- /dev/null
+//===-- lib/floattitf.c - int128 -> quad-precision conversion -----*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ti_int to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+#include "int_lib.h"
+
+/* Returns: convert a ti_int to a fp_t, rounding toward even. */
+
+/* Assumption: fp_t is a IEEE 128 bit floating point type
+ * ti_int is a 128 bit integral type
+ */
+
+/* seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm |
+ * mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+COMPILER_RT_ABI fp_t
+__floattitf(ti_int a) {
+ if (a == 0)
+ return 0.0;
+ const unsigned N = sizeof(ti_int) * CHAR_BIT;
+ const ti_int s = a >> (N-1);
+ a = (a ^ s) - s;
+ int sd = N - __clzti2(a); /* number of significant digits */
+ int e = sd - 1; /* exponent */
+ if (sd > LDBL_MANT_DIG) {
+ /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+ * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+ * 12345678901234567890123456
+ * 1 = msb 1 bit
+ * P = bit LDBL_MANT_DIG-1 bits to the right of 1
+ * Q = bit LDBL_MANT_DIG bits to the right of 1
+ * R = "or" of all bits to the right of Q
+ */
+ switch (sd) {
+ case LDBL_MANT_DIG + 1:
+ a <<= 1;
+ break;
+ case LDBL_MANT_DIG + 2:
+ break;
+ default:
+ a = ((tu_int)a >> (sd - (LDBL_MANT_DIG+2))) |
+ ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0);
+ };
+ /* finish: */
+ a |= (a & 4) != 0; /* Or P into R */
+ ++a; /* round - this step may add a significant bit */
+ a >>= 2; /* dump Q and R */
+ /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */
+ if (a & ((tu_int)1 << LDBL_MANT_DIG)) {
+ a >>= 1;
+ ++e;
+ }
+ /* a is now rounded to LDBL_MANT_DIG bits */
+ } else {
+ a <<= (LDBL_MANT_DIG - sd);
+ /* a is now rounded to LDBL_MANT_DIG bits */
+ }
+
+ long_double_bits fb;
+ fb.u.high.all = (s & 0x8000000000000000LL) /* sign */
+ | (du_int)(e + 16383) << 48 /* exponent */
+ | ((a >> 64) & 0x0000ffffffffffffLL); /* significand */
+ fb.u.low.all = (du_int)(a);
+ return fb.f;
+}
+
+#endif
#include "int_lib.h"
-ARM_EABI_FNALIAS(ul2d, floatundidf)
-
#ifndef __SOFT_FP__
/* Support for systems that have hardware floating-point; we'll set the inexact flag
* as a side-effect of this computation.
return fb.f;
}
#endif
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI double __aeabi_ul2d(du_int a) {
+ return __floatundidf(a);
+}
+#endif
+
#include "int_lib.h"
-ARM_EABI_FNALIAS(ul2f, floatundisf)
-
COMPILER_RT_ABI float
__floatundisf(du_int a)
{
((su_int)a & 0x007FFFFF); /* mantissa */
return fb.f;
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI float __aeabi_ul2f(du_int a) {
+ return __floatundisf(a);
+}
+#endif
+
#include "int_lib.h"
-ARM_EABI_FNALIAS(ui2d, floatunsidf)
-
COMPILER_RT_ABI fp_t
__floatunsidf(unsigned int a) {
result += (rep_t)(exponent + exponentBias) << significandBits;
return fromRep(result);
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_ui2d(unsigned int a) {
+ return __floatunsidf(a);
+}
+#endif
+
#include "int_lib.h"
-ARM_EABI_FNALIAS(ui2f, floatunsisf)
-
COMPILER_RT_ABI fp_t
__floatunsisf(unsigned int a) {
result += (rep_t)(exponent + exponentBias) << significandBits;
return fromRep(result);
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_ui2f(unsigned int a) {
+ return __floatunsisf(a);
+}
+#endif
+
--- /dev/null
+//===-- lib/floatuntitf.c - uint128 -> quad-precision conversion --*- C -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements tu_int to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+#include "int_lib.h"
+
+/* Returns: convert a tu_int to a fp_t, rounding toward even. */
+
+/* Assumption: fp_t is a IEEE 128 bit floating point type
+ * tu_int is a 128 bit integral type
+ */
+
+/* seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm |
+ * mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+ */
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+COMPILER_RT_ABI fp_t
+__floatuntitf(tu_int a) {
+ if (a == 0)
+ return 0.0;
+ const unsigned N = sizeof(tu_int) * CHAR_BIT;
+ int sd = N - __clzti2(a); /* number of significant digits */
+ int e = sd - 1; /* exponent */
+ if (sd > LDBL_MANT_DIG) {
+ /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+ * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+ * 12345678901234567890123456
+ * 1 = msb 1 bit
+ * P = bit LDBL_MANT_DIG-1 bits to the right of 1
+ * Q = bit LDBL_MANT_DIG bits to the right of 1
+ * R = "or" of all bits to the right of Q
+ */
+ switch (sd) {
+ case LDBL_MANT_DIG + 1:
+ a <<= 1;
+ break;
+ case LDBL_MANT_DIG + 2:
+ break;
+ default:
+ a = (a >> (sd - (LDBL_MANT_DIG+2))) |
+ ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0);
+ };
+ /* finish: */
+ a |= (a & 4) != 0; /* Or P into R */
+ ++a; /* round - this step may add a significant bit */
+ a >>= 2; /* dump Q and R */
+ /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */
+ if (a & ((tu_int)1 << LDBL_MANT_DIG)) {
+ a >>= 1;
+ ++e;
+ }
+ /* a is now rounded to LDBL_MANT_DIG bits */
+ } else {
+ a <<= (LDBL_MANT_DIG - sd);
+ /* a is now rounded to LDBL_MANT_DIG bits */
+ }
+
+ long_double_bits fb;
+ fb.u.high.all = (du_int)(e + 16383) << 48 /* exponent */
+ | ((a >> 64) & 0x0000ffffffffffffLL); /* significand */
+ fb.u.low.all = (du_int)(a);
+ return fb.f;
+}
+
+#endif
+++ /dev/null
-#===- lib/builtins/i386/Makefile.mk ------------------------*- Makefile -*--===#
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs :=
-OnlyArchs := i386
-
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
-Implementation := Optimized
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
/* ABI macro definitions */
#if __ARM_EABI__
-# define ARM_EABI_FNALIAS(aeabi_name, name) \
- void __aeabi_##aeabi_name() __attribute__((alias("__" #name)));
-# define COMPILER_RT_ABI __attribute__((pcs("aapcs")))
+# ifdef COMPILER_RT_ARMHF_TARGET
+# define COMPILER_RT_ABI
+# else
+# define COMPILER_RT_ABI __attribute__((__pcs__("aapcs")))
+# endif
#else
-# define ARM_EABI_FNALIAS(aeabi_name, name)
# define COMPILER_RT_ABI
#endif
+#define AEABI_RTABI __attribute__((__pcs__("aapcs")))
+
#ifdef _MSC_VER
#define ALWAYS_INLINE __forceinline
#define NOINLINE __declspec(noinline)
#include <intrin.h>
uint32_t __inline __builtin_ctz(uint32_t value) {
- uint32_t trailing_zero = 0;
+ unsigned long trailing_zero = 0;
if (_BitScanForward(&trailing_zero, value))
return trailing_zero;
return 32;
}
uint32_t __inline __builtin_clz(uint32_t value) {
- uint32_t leading_zero = 0;
+ unsigned long leading_zero = 0;
if (_BitScanReverse(&leading_zero, value))
return 31 - leading_zero;
return 32;
#if defined(_M_ARM) || defined(_M_X64)
uint32_t __inline __builtin_clzll(uint64_t value) {
- uint32_t leading_zero = 0;
+ unsigned long leading_zero = 0;
if (_BitScanReverse64(&leading_zero, value))
return 63 - leading_zero;
return 64;
}s;
} udwords;
-/* MIPS64 issue: PR 20098 */
-#if (defined(__LP64__) || defined(__wasm__)) && \
- !(defined(__mips__) && defined(__clang__))
+#if (defined(__LP64__) || defined(__wasm__) || defined(__mips64))
#define CRT_HAS_128BIT
#endif
__assert_rtn(function, file, line, "libcompiler_rt abort");
}
+#elif __Fuchsia__
+
+#ifndef _WIN32
+__attribute__((weak))
+__attribute__((visibility("hidden")))
+#endif
+void compilerrt_abort_impl(const char *file, int line, const char *function) {
+ __builtin_trap();
+}
+
#else
/* Get the system definition of abort() */
/* Precondition: 0 <= b < bits_in_dword */
-ARM_EABI_FNALIAS(llsr, lshrdi3)
-
COMPILER_RT_ABI di_int
__lshrdi3(di_int a, si_int b)
{
}
return result.all;
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI di_int __aeabi_llsr(di_int a, si_int b) {
+ return __lshrdi3(a, b);
+}
+#endif
+
--- /dev/null
+/* ===-- mingw_fixfloat.c - Wrap int/float conversions for arm/windows -----===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI di_int __fixdfdi(double a);
+COMPILER_RT_ABI di_int __fixsfdi(float a);
+COMPILER_RT_ABI du_int __fixunsdfdi(double a);
+COMPILER_RT_ABI du_int __fixunssfdi(float a);
+COMPILER_RT_ABI double __floatdidf(di_int a);
+COMPILER_RT_ABI float __floatdisf(di_int a);
+COMPILER_RT_ABI double __floatundidf(du_int a);
+COMPILER_RT_ABI float __floatundisf(du_int a);
+
+COMPILER_RT_ABI di_int __dtoi64(double a) { return __fixdfdi(a); }
+
+COMPILER_RT_ABI di_int __stoi64(float a) { return __fixsfdi(a); }
+
+COMPILER_RT_ABI du_int __dtou64(double a) { return __fixunsdfdi(a); }
+
+COMPILER_RT_ABI du_int __stou64(float a) { return __fixunssfdi(a); }
+
+COMPILER_RT_ABI double __i64tod(di_int a) { return __floatdidf(a); }
+
+COMPILER_RT_ABI float __i64tos(di_int a) { return __floatdisf(a); }
+
+COMPILER_RT_ABI double __u64tod(du_int a) { return __floatundidf(a); }
+
+COMPILER_RT_ABI float __u64tos(du_int a) { return __floatundisf(a); }
#define DOUBLE_PRECISION
#include "fp_mul_impl.inc"
-ARM_EABI_FNALIAS(dmul, muldf3)
-
COMPILER_RT_ABI fp_t __muldf3(fp_t a, fp_t b) {
return __mulXf3__(a, b);
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_dmul(fp_t a, fp_t b) {
+ return __muldf3(a, b);
+}
+#endif
+
/* Returns: a * b */
-ARM_EABI_FNALIAS(lmul, muldi3)
-
COMPILER_RT_ABI di_int
__muldi3(di_int a, di_int b)
{
r.s.high += x.s.high * y.s.low + x.s.low * y.s.high;
return r.all;
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI di_int __aeabi_lmul(di_int a, di_int b) {
+ return __muldi3(a, b);
+}
+#endif
+
#define SINGLE_PRECISION
#include "fp_mul_impl.inc"
-ARM_EABI_FNALIAS(fmul, mulsf3)
-
COMPILER_RT_ABI fp_t __mulsf3(fp_t a, fp_t b) {
return __mulXf3__(a, b);
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_fmul(fp_t a, fp_t b) {
+ return __mulsf3(a, b);
+}
+#endif
+
#define DOUBLE_PRECISION
#include "fp_lib.h"
-ARM_EABI_FNALIAS(dneg, negdf2)
-
COMPILER_RT_ABI fp_t
__negdf2(fp_t a) {
return fromRep(toRep(a) ^ signBit);
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_dneg(fp_t a) {
+ return __negdf2(a);
+}
+#endif
+
#define SINGLE_PRECISION
#include "fp_lib.h"
-ARM_EABI_FNALIAS(fneg, negsf2)
-
COMPILER_RT_ABI fp_t
__negsf2(fp_t a) {
return fromRep(toRep(a) ^ signBit);
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_fneg(fp_t a) {
+ return __negsf2(a);
+}
+#endif
+
+++ /dev/null
-#===- lib/builtins/ppc/Makefile.mk -------------------------*- Makefile -*--===#
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs :=
-OnlyArchs := ppc
-
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
-Implementation := Optimized
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
#define DOUBLE_PRECISION
#include "fp_lib.h"
-ARM_EABI_FNALIAS(dsub, subdf3)
-
// Subtraction; flip the sign bit of b and add.
COMPILER_RT_ABI fp_t
__subdf3(fp_t a, fp_t b) {
return __adddf3(a, fromRep(toRep(b) ^ signBit));
}
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_dsub(fp_t a, fp_t b) {
+ return __subdf3(a, b);
+}
+#endif
+
#define SINGLE_PRECISION
#include "fp_lib.h"
-ARM_EABI_FNALIAS(fsub, subsf3)
-
// Subtraction; flip the sign bit of b and add.
COMPILER_RT_ABI fp_t
__subsf3(fp_t a, fp_t b) {
return __addsf3(a, fromRep(toRep(b) ^ signBit));
}
+#if defined(__ARM_EABI__)
+AEABI_RTABI fp_t __aeabi_fsub(fp_t a, fp_t b) {
+ return __subsf3(a, b);
+}
+#endif
+
#define DST_HALF
#include "fp_trunc_impl.inc"
-ARM_EABI_FNALIAS(d2h, truncdfhf2)
-
COMPILER_RT_ABI uint16_t __truncdfhf2(double a) {
return __truncXfYf2__(a);
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI uint16_t __aeabi_d2h(double a) {
+ return __truncdfhf2(a);
+}
+#endif
+
#define DST_SINGLE
#include "fp_trunc_impl.inc"
-ARM_EABI_FNALIAS(d2f, truncdfsf2)
-
COMPILER_RT_ABI float __truncdfsf2(double a) {
return __truncXfYf2__(a);
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI float __aeabi_d2f(double a) {
+ return __truncdfsf2(a);
+}
+#endif
+
#define DST_HALF
#include "fp_trunc_impl.inc"
-ARM_EABI_FNALIAS(f2h, truncsfhf2)
-
// Use a forwarding definition and noinline to implement a poor man's alias,
// as there isn't a good cross-platform way of defining one.
COMPILER_RT_ABI NOINLINE uint16_t __truncsfhf2(float a) {
COMPILER_RT_ABI uint16_t __gnu_f2h_ieee(float a) {
return __truncsfhf2(a);
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI uint16_t __aeabi_f2h(float a) {
+ return __truncsfhf2(a);
+}
+#endif
+
/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
-ARM_EABI_FNALIAS(uidiv, udivsi3)
-
/* This function should not call __divsi3! */
COMPILER_RT_ABI su_int
__udivsi3(su_int n, su_int d)
q = (q << 1) | carry;
return q;
}
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI su_int __aeabi_uidiv(su_int n, su_int d) {
+ return __udivsi3(n, d);
+}
+#endif
+
+++ /dev/null
-#===- lib/builtins/x86_64/Makefile.mk ----------------------*- Makefile -*--===#
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-ModuleName := builtins
-SubDirs :=
-OnlyArchs := x86_64 x86_64h
-
-AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
-Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
-ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
-Implementation := Optimized
-
-# FIXME: use automatic dependencies?
-Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
/* double __floatdidf(di_int a); */
-#ifdef __x86_64__
+#if defined(__x86_64__) || defined(_M_X64)
#include "../int_lib.h"
* License. See LICENSE.TXT for details.
*/
-#ifdef __x86_64__
+#if defined(__x86_64__) || defined(_M_X64)
#include "../int_lib.h"