Redo the calculation of the alignment and placement of static TLS data to
authorguenther <guenther@openbsd.org>
Fri, 1 Dec 2017 23:30:05 +0000 (23:30 +0000)
committerguenther <guenther@openbsd.org>
Fri, 1 Dec 2017 23:30:05 +0000 (23:30 +0000)
correctly take into account the segment p_align.  Previously, anything
with a size belong the natural alignment or with alignment larger than
the natural one would either not be intialized correctly, be misaligned,
or result in the TIB being misaligned.

Problems reported by Charles Collicutt (charles (at) collicutt.co.uk)
ok kettenis@

lib/libc/dlfcn/init.c
lib/libc/dlfcn/tib.c
lib/libc/hidden/tib.h
libexec/ld.so/malloc.c
libexec/ld.so/tib.c
libexec/ld.so/util.h

index c791c88..3c387a6 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: init.c,v 1.5 2016/09/06 18:49:34 guenther Exp $ */
+/*     $OpenBSD: init.c,v 1.6 2017/12/01 23:30:05 guenther Exp $ */
 /*
  * Copyright (c) 2014,2015 Philip Guenther <guenther@openbsd.org>
  *
 
 #include "init.h"
 
+#define MAX(a,b)       (((a)>(b))?(a):(b))
+
+#ifdef TIB_EXTRA_ALIGN
+# define TIB_ALIGN     MAX(__alignof__(struct tib), TIB_EXTRA_ALIGN)
+#else
+# define TIB_ALIGN     __alignof__(struct tib)
+#endif
+
 /* XXX should be in an include file shared with csu */
 char   ***_csu_finish(char **_argv, char **_envp, void (*_cleanup)(void));
 
@@ -53,8 +61,10 @@ struct dl_phdr_info  _static_phdr_info = { .dlpi_name = "a.out" };
 
 static inline void early_static_init(char **_argv, char **_envp);
 static inline void setup_static_tib(Elf_Phdr *_phdr, int _phnum);
-#endif /* PIC */
 
+/* provided by the linker */
+extern Elf_Ehdr __executable_start[] __attribute__((weak));
+#endif /* PIC */
 
 /*
  * extract useful bits from the auxiliary vector and either
@@ -99,6 +109,15 @@ _csu_finish(char **argv, char **envp, void (*cleanup)(void))
        }
 
 #ifndef PIC
+       if (cleanup == NULL && phdr == NULL && __executable_start != NULL) {
+               /*
+                * Static non-PIE processes don't get an AUX vector,
+                * so find the phdrs through the ELF header
+                */
+               phdr = (void *)((char *)__executable_start +
+                   __executable_start->e_phoff);
+               phnum = __executable_start->e_phnum;
+       }
        /* static libc in a static link? */
        if (cleanup == NULL)
                setup_static_tib(phdr, phnum);
@@ -148,6 +167,8 @@ static void         *static_tls;
 static size_t          static_tls_fsize;
 
 size_t                 _static_tls_size = 0;
+int                    _static_tls_align;
+int                    _static_tls_align_offset;
 
 static inline void
 setup_static_tib(Elf_Phdr *phdr, int phnum)
@@ -156,6 +177,7 @@ setup_static_tib(Elf_Phdr *phdr, int phnum)
        char *base;
        int i;
 
+       _static_tls_align = TIB_ALIGN;
        if (phdr != NULL) {
                for (i = 0; i < phnum; i++) {
                        if (phdr[i].p_type != PT_TLS)
@@ -164,39 +186,53 @@ setup_static_tib(Elf_Phdr *phdr, int phnum)
                                break;
                        if (phdr[i].p_memsz < phdr[i].p_filesz)
                                break;          /* invalid */
+                       if (phdr[i].p_align > getpagesize())
+                               break;          /* nope */
+                       _static_tls_align = MAX(phdr[i].p_align, TIB_ALIGN);
 #if TLS_VARIANT == 1
+                       /*
+                        * Variant 1 places the data after the TIB.  If the
+                        * TLS alignment is larger than the TIB alignment
+                        * then we may need to pad in front of the TIB to
+                        * place the TLS data on the proper alignment.
+                        * Example: p_align=16 sizeof(TIB)=52 align(TIB)=4
+                        * - need to offset the TIB 12 bytes from the start
+                        * - to place ths TLS data at offset 64
+                        */
                        _static_tls_size = phdr[i].p_memsz;
+                       _static_tls_align_offset =
+                           ELF_ROUND(sizeof(struct tib), _static_tls_align) -
+                           sizeof(struct tib);
 #elif TLS_VARIANT == 2
                        /*
-                        * variant 2 places the data before the TIB
-                        * so we need to round up to the alignment
+                        * Variant 2 places the data before the TIB
+                        * so we need to round up the size to the
+                        * TLS data alignment TIB's alignment.
+                        * Example A: p_memsz=24 p_align=16 align(TIB)=8
+                        * - need to allocate 32 bytes for TLS as compiler
+                        * - will give the first TLS symbol an offset of -32
+                        * Example B: p_memsz=4 p_align=4 align(TIB)=8
+                        * - need to allocate 8 bytes so that the TIB is
+                        * - properly aligned
                         */
                        _static_tls_size = ELF_ROUND(phdr[i].p_memsz,
                            phdr[i].p_align);
+                       _static_tls_align_offset = ELF_ROUND(_static_tls_size,
+                           _static_tls_align) - _static_tls_size;
 #endif
                        if (phdr[i].p_vaddr != 0 && phdr[i].p_filesz != 0) {
-                               static_tls = (void *)phdr[i].p_vaddr;
+                               static_tls = (void *)phdr[i].p_vaddr +
+                                   _static_phdr_info.dlpi_addr;
                                static_tls_fsize = phdr[i].p_filesz;
                        }
                        break;
                }
        }
 
-       /*
-        * We call getpagesize() here instead of using _pagesize because
-        * there's no aux-vector in non-PIE static links, so _pagesize
-        * might not be set yet.  If so getpagesize() will get the value.
-        */
-       base = mmap(NULL, ELF_ROUND(_static_tls_size + sizeof *tib,
-           getpagesize()), PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
-# if TLS_VARIANT == 1
-       tib = (struct tib *)base;
-# elif TLS_VARIANT == 2
-       tib = (struct tib *)(base + _static_tls_size);
-# endif
+       base = mmap(NULL, _static_tls_size + _static_tls_align_offset
+           + sizeof *tib, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
 
-       _static_tls_init(base);
-       TIB_INIT(tib, NULL, NULL);
+       tib = _static_tls_init(base, NULL);
        tib->tib_tid = getthrid();
        TCB_SET(TIB_TO_TCB(tib));
 #if ! TCB_HAVE_MD_GET
@@ -204,17 +240,27 @@ setup_static_tib(Elf_Phdr *phdr, int phnum)
 #endif
 }
 
-void
-_static_tls_init(char *base)
+struct tib *
+_static_tls_init(char *base, void *thread)
 {
+       struct tib *tib;
+
+       base += _static_tls_align_offset;
+# if TLS_VARIANT == 1
+       tib = (struct tib *)base;
+       base += sizeof(struct tib);
+# elif TLS_VARIANT == 2
+       tib = (struct tib *)(base + _static_tls_size);
+# endif
+
        if (_static_tls_size) {
-#if TLS_VARIANT == 1
-               base += sizeof(struct tib);
-#endif
                if (static_tls != NULL)
                        memcpy(base, static_tls, static_tls_fsize);
                memset(base + static_tls_fsize, 0,
                    _static_tls_size - static_tls_fsize);
        }
+
+       TIB_INIT(tib, NULL, thread);
+       return tib;
 }
 #endif /* !PIC */
index 4aba706..3b437c1 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: tib.c,v 1.1 2016/05/07 19:05:22 guenther Exp $ */
+/*     $OpenBSD: tib.c,v 1.2 2017/12/01 23:30:05 guenther Exp $ */
 /*
  * Copyright (c) 2016 Philip Guenther <guenther@openbsd.org>
  *
@@ -18,7 +18,7 @@
 #include <tib.h>
 
 #ifndef PIC
-# include <stdlib.h>           /* malloc and free */
+# include <stdlib.h>           /* posix_memalign and free */
 #endif
 
 #define ELF_ROUND(x,malign)    (((x) + (malign)-1) & ~((malign)-1))
@@ -43,29 +43,31 @@ _dl_allocate_tib(size_t extra)
 #ifdef PIC
        return NULL;                    /* overriden by ld.so */
 #else
-       char *base;
+       void *base;
        char *thread;
-       struct tib *tib;
 
 # if TLS_VARIANT == 1
-       /* round up the extra size to align the tib after it */
-       extra = ELF_ROUND(extra, sizeof(void *));
-       base = malloc(extra + sizeof(struct tib) + _static_tls_size);
-       tib = (struct tib *)(base + extra);
+       /* round up the extra size to align the TIB and TLS data after it */
+       extra = (extra <= _static_tls_align_offset) ? 0 :
+           ELF_ROUND(extra - _static_tls_align_offset, _static_tls_align);
+       if (posix_memalign(&base, _static_tls_align, extra +
+           _static_tls_align_offset + sizeof(struct tib) +
+           _static_tls_size) != 0)
+               return NULL;
        thread = base;
+       base = (char *)base + extra;
 
 # elif TLS_VARIANT == 2
-       /* round up the tib size to align the extra area after it */
-       base = malloc(ELF_ROUND(sizeof(struct tib), TIB_EXTRA_ALIGN) +
-           extra + _static_tls_size);
-       tib = (struct tib *)(base + _static_tls_size);
-       thread = (char *)tib + ELF_ROUND(sizeof(struct tib), TIB_EXTRA_ALIGN);
+       /* round up the TIB size to align the extra area after it */
+       if (posix_memalign(&base, _static_tls_align,
+           _static_tls_align_offset + _static_tls_size +
+           ELF_ROUND(sizeof(struct tib), TIB_EXTRA_ALIGN) + extra) != 0)
+               return NULL;
+       thread = (char *)base + _static_tls_align_offset + _static_tls_size +
+           ELF_ROUND(sizeof(struct tib), TIB_EXTRA_ALIGN);
 # endif
 
-       _static_tls_init(base);
-       TIB_INIT(tib, NULL, thread);
-
-       return (tib);
+       return _static_tls_init(base, thread);
 #endif /* !PIC */
 }
 
@@ -76,10 +78,12 @@ _dl_free_tib(void *tib, size_t extra)
        size_t tib_offset;
 
 # if TLS_VARIANT == 1
-       tib_offset = ELF_ROUND(extra, sizeof(void *));
+       tib_offset = (extra <= _static_tls_align_offset) ? 0 :
+           ELF_ROUND(extra - _static_tls_align_offset, _static_tls_align);
 # elif TLS_VARIANT == 2
        tib_offset = _static_tls_size;
 # endif
+       tib_offset += _static_tls_align_offset;
 
        free((char *)tib - tib_offset);
 #endif /* !PIC */
index 49a562d..9c4be95 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: tib.h,v 1.1 2016/05/07 19:05:22 guenther Exp $        */
+/*     $OpenBSD: tib.h,v 1.2 2017/12/01 23:30:05 guenther Exp $        */
 /*
  * Copyright (c) 2015 Philip Guenther <guenther@openbsd.org>
  *
 __BEGIN_HIDDEN_DECLS
 
 #ifndef PIC
-void   _static_tls_init(char *_base);
+/*
+ * Handling for static TLS allocation in staticly linked programs
+ */
+/* Given the base of a TIB allocation, initialize the static TLS for a thread */
+struct tib *_static_tls_init(char *_base, void *_thread);
 
-/* size of static TLS allocation in staticly linked programs */
+/* size of static TLS allocation */
 extern size_t  _static_tls_size;
+
+/* alignment of static TLS allocation */
+extern int     _static_tls_align;
+
+/* base-offset alignment of static TLS allocation */
+extern int     _static_tls_align_offset;
 #endif
 
 #if ! TCB_HAVE_MD_GET
index d9032dc..fb93e37 100644 (file)
@@ -1105,3 +1105,122 @@ ret:
        return r;
 }
 
+static void *
+mapalign(struct dir_info *d, size_t alignment, size_t sz, int zero_fill)
+{
+       char *p, *q;
+
+       if (alignment < MALLOC_PAGESIZE || ((alignment - 1) & alignment) != 0)
+               wrterror("mapalign bad alignment");
+       if (sz != PAGEROUND(sz))
+               wrterror("mapalign round");
+
+       /* Allocate sz + alignment bytes of memory, which must include a
+        * subrange of size bytes that is properly aligned.  Unmap the
+        * other bytes, and then return that subrange.
+        */
+
+       /* We need sz + alignment to fit into a size_t. */
+       if (alignment > SIZE_MAX - sz)
+               return MAP_FAILED;
+
+       p = map(d, sz + alignment, zero_fill);
+       if (p == MAP_FAILED)
+               return MAP_FAILED;
+       q = (char *)(((uintptr_t)p + alignment - 1) & ~(alignment - 1));
+       if (q != p) {
+               if (_dl_munmap(p, q - p))
+                       wrterror("munmap");
+       }
+       if (_dl_munmap(q + sz, alignment - (q - p)))
+               wrterror("munmap");
+
+       return q;
+}
+
+static void *
+omemalign(size_t alignment, size_t sz, int zero_fill)
+{
+       size_t psz;
+       void *p;
+
+       /* If between half a page and a page, avoid MALLOC_MOVE. */
+       if (sz > MALLOC_MAXCHUNK && sz < MALLOC_PAGESIZE)
+               sz = MALLOC_PAGESIZE;
+       if (alignment <= MALLOC_PAGESIZE) {
+               /*
+                * max(size, alignment) is enough to assure the requested
+                * alignment, since the allocator always allocates
+                * power-of-two blocks.
+                */
+               if (sz < alignment)
+                       sz = alignment;
+               return omalloc(sz, zero_fill);
+       }
+
+       if (sz >= SIZE_MAX - mopts.malloc_guard - MALLOC_PAGESIZE) {
+               return NULL;
+       }
+
+       sz += mopts.malloc_guard;
+       psz = PAGEROUND(sz);
+
+       p = mapalign(g_pool, alignment, psz, zero_fill);
+       if (p == MAP_FAILED) {
+               return NULL;
+       }
+
+       if (insert(g_pool, p, sz)) {
+               unmap(g_pool, p, psz);
+               return NULL;
+       }
+
+       if (mopts.malloc_guard) {
+               if (_dl_mprotect((char *)p + psz - mopts.malloc_guard,
+                   mopts.malloc_guard, PROT_NONE))
+                       wrterror("mprotect");
+       }
+
+       if (mopts.malloc_junk == 2) {
+               if (zero_fill)
+                       _dl_memset((char *)p + sz - mopts.malloc_guard,
+                           SOME_JUNK, psz - sz);
+               else
+                       _dl_memset(p, SOME_JUNK, psz - mopts.malloc_guard);
+       }
+       else if (mopts.chunk_canaries) {
+               size_t csz = psz - sz;
+
+               if (csz > CHUNK_CHECK_LENGTH)
+                       csz = CHUNK_CHECK_LENGTH;
+               _dl_memset((char *)p + sz - mopts.malloc_guard,
+                   SOME_JUNK, csz);
+       }
+
+       return p;
+}
+
+void *
+_dl_aligned_alloc(size_t alignment, size_t size)
+{
+       void *r = NULL;
+       lock_cb *cb;
+
+       /* Make sure that alignment is a large enough power of 2. */
+       if (((alignment - 1) & alignment) != 0 || alignment < sizeof(void *))
+               return NULL;
+
+       cb = _dl_thread_kern_stop();
+       if (g_pool == NULL)
+               omalloc_init(&g_pool);
+       g_pool->func = "aligned_alloc():";
+       if (g_pool->active++) {
+               malloc_recurse();
+               goto ret;
+       }
+       r = omemalign(alignment, size, 0);
+       g_pool->active--;
+ret:
+       _dl_thread_kern_go(cb);
+       return r;
+}
index 93f6a6c..41aafce 100644 (file)
 
 __dso_hidden void *allocate_tib(size_t);
 
+#define MAX(a,b)       (((a)>(b))?(a):(b))
+
+#ifdef TIB_EXTRA_ALIGN
+# define TIB_ALIGN     MAX(__alignof__(struct tib), TIB_EXTRA_ALIGN)
+#else
+# define TIB_ALIGN     __alignof__(struct tib)
+#endif
+
+
+/* size of static TLS allocation */
 static int     static_tls_size;
+/* alignment of static TLS allocation */
+static int     static_tls_align;
+/* base-offset alignment of (first) static TLS allocation */
+static int     static_tls_align_offset;
 
 int            _dl_tib_static_done;
 
@@ -55,22 +69,26 @@ allocate_tib(size_t extra)
        struct elf_object *obj;
 
 #if TLS_VARIANT == 1
-       /* round up the extra size to align the tib after it */
-       extra = ELF_ROUND(extra, sizeof(void *));
-       base = _dl_malloc(extra + sizeof *tib + static_tls_size);
+       /* round up the extra size to align the TIB and TLS data after it */
+       size_t unpad_extra = (extra <= static_tls_align_offset) ? 0 :
+           ELF_ROUND(extra - static_tls_align_offset, static_tls_align);
+       base = _dl_aligned_alloc(static_tls_align, unpad_extra +
+           static_tls_align_offset + sizeof *tib + static_tls_size);
        if (base == NULL)
                return NULL;
-       tib = (struct tib *)(base + extra);
+       tib = (struct tib *)(base + unpad_extra + static_tls_align_offset);
        if (extra)
                thread = base;
 #define TLS_ADDR(tibp, offset) ((char *)(tibp) + sizeof(struct tib) + (offset))
 
 #elif TLS_VARIANT == 2
-       /* round up the tib size to align the extra area after it */
-       base = _dl_malloc(ELF_ROUND(sizeof *tib, TIB_EXTRA_ALIGN) +
-           extra + static_tls_size);
+       /* round up the TIB size to align the extra area after it */
+       base = _dl_aligned_alloc(static_tls_align, static_tls_size +
+           static_tls_align_offset + ELF_ROUND(sizeof *tib, TIB_EXTRA_ALIGN) +
+           extra);
        if (base == NULL)
                return NULL;
+       base += static_tls_align_offset;
        tib = (struct tib *)(base + static_tls_size);
        if (extra)
                thread = (char *)tib + ELF_ROUND(sizeof *tib, TIB_EXTRA_ALIGN);
@@ -107,10 +125,12 @@ _dl_free_tib(void *tib, size_t extra)
        size_t tib_offset;
 
 #if TLS_VARIANT == 1
-       tib_offset = ELF_ROUND(extra, sizeof(void *));
+       tib_offset = (extra <= static_tls_align_offset) ? 0 :
+           ELF_ROUND(extra - static_tls_align_offset, static_tls_align);
 #elif TLS_VARIANT == 2
        tib_offset = static_tls_size;
 #endif
+       tib_offset += static_tls_align_offset;
 
        DL_DEB(("free tib=%p\n", (void *)tib));
        _dl_free((char *)tib - tib_offset);
@@ -136,19 +156,68 @@ _dl_set_tls(elf_object_t *object, Elf_Phdr *ptls, Elf_Addr libaddr,
 }
 
 static inline Elf_Addr
-allocate_tls_offset(Elf_Addr msize, Elf_Addr align)
+allocate_tls_offset(Elf_Addr msize, Elf_Addr align, int for_exe)
 {
        Elf_Addr offset;
 
+       if (for_exe && static_tls_size != 0)
+               _dl_die("TLS allocation before executable!");
+
 #if TLS_VARIANT == 1
-       /* round up to the required alignment, then allocate the space */
-       offset = ELF_ROUND(static_tls_size, align);
-       static_tls_size += msize;
+       if (for_exe) {
+               /*
+                * Variant 1 places the data after the TIB.  If the
+                * TLS alignment is larger than the TIB alignment
+                * then we may need to pad in front of the TIB to
+                * place the TLS data on the proper alignment.
+                * Example: p_align=16 sizeof(TIB)=52 align(TIB)=4
+                * - need to offset the TIB 12 bytes from the start
+                * - to place ths TLS data at offset 64
+                */
+               static_tls_align = MAX(align, TIB_ALIGN);
+               static_tls_align_offset =
+                   ELF_ROUND(sizeof(struct tib), static_tls_align) -
+                   sizeof(struct tib);
+               offset = 0;
+               static_tls_size = msize;
+       } else {
+               /*
+                * If a later object increases the alignment, realign the
+                * existing sections.  We push as much padding as possible
+                * to the start there it can overlap the thread structure
+                */
+               if (static_tls_align < align) {
+                       static_tls_align_offset += align - static_tls_align;
+                       static_tls_align = align;
+               }
+
+               /*
+                * Round up to the required alignment, taking into account
+                * the leading padding and TIB, then allocate the space.
+                */
+               offset = static_tls_align_offset + sizeof(struct tib) +
+                   static_tls_size;
+               offset = ELF_ROUND(offset, align) - static_tls_align_offset
+                   - sizeof(struct tib);
+               static_tls_size = offset + msize;
+       }
 #elif TLS_VARIANT == 2
+       /* Realignment is automatic for variant II */
+       if (static_tls_align < align)
+               static_tls_align = align;
+
        /*
-        * allocate the space, then round up to the alignment
-        * (these are negative offsets, so rounding up really rounds the
-        * address down)
+        * Variant 2 places the data before the TIB so we need to round up
+        * the size to the TLS data alignment TIB's alignment.
+        * Example A: p_memsz=24 p_align=16 align(TIB)=8
+        * - need to allocate 32 bytes for TLS as compiler
+        * - will give the first TLS symbol an offset of -32
+        * Example B: p_memsz=4 p_align=4 align(TIB)=8
+        * - need to allocate 8 bytes so that the TIB is
+        * - properly aligned
+        * So: allocate the space, then round up to the alignment
+        * (these are negative offsets, so rounding up really
+        * rounds the address down)
         */
        static_tls_size = ELF_ROUND(static_tls_size + msize, align);
        offset = static_tls_size;
@@ -166,15 +235,24 @@ _dl_allocate_tls_offsets(void)
 {
        struct elf_object *obj;
 
+       static_tls_align = TIB_ALIGN;
        for (obj = _dl_objects; obj != NULL; obj = obj->next) {
                if (obj->tls_msize != 0) {
                        obj->tls_offset = allocate_tls_offset(obj->tls_msize,
-                           obj->tls_align);
+                           obj->tls_align, obj->obj_type == OBJTYPE_EXE);
                }
        }
 
+#if TLS_VARIANT == 2
+       static_tls_align_offset = ELF_ROUND(static_tls_size, static_tls_align)
+           - static_tls_size;
+#endif
+
        /* no more static TLS allocations after this */
        _dl_tib_static_done = 1;
+
+       DL_DEB(("static tls size=%x align=%x offset=%x\n",
+           static_tls_size, static_tls_align, static_tls_align_offset));
 }
 
 /*
index 9820fb3..260817d 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: util.h,v 1.31 2017/08/29 15:25:51 deraadt Exp $       */
+/*     $OpenBSD: util.h,v 1.32 2017/12/01 23:30:05 guenther Exp $      */
 
 /*
  * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
@@ -41,6 +41,7 @@ void *_dl_calloc(size_t nmemb, const size_t size);
 void *_dl_realloc(void *, size_t size);
 void *_dl_reallocarray(void *, size_t nmemb, size_t size);
 void _dl_free(void *);
+void *_dl_aligned_alloc(size_t _alignment, size_t _size);
 char *_dl_strdup(const char *);
 size_t _dl_strlen(const char *);
 size_t _dl_strlcat(char *dst, const char *src, size_t siz);