From c31aa7c2c5b9bbd839a954f05d7fd1e547476e82 Mon Sep 17 00:00:00 2001 From: dlg Date: Thu, 27 Apr 2017 00:02:03 +0000 Subject: [PATCH] add support for -msave-args in gcc on amd64 i got sick of not having arguments in ddb stack traces on amd64, which is because amd64 passes arguments in registers, and it's impossible to figure out where they go without dwarf info, and when you have dwarf info it is complicated. solaris has a simple solution for this. they tweaked their compilers to accept an -msave-args option which makes functions store their arguments on the stack, while maintaining compatability with the System V AMD64 ABI. tools (eg, ddb) can then look at the stack to get access to function arguments in traces. this ports their changes to gcc 3 to our gcc. ok deraadt@ --- gnu/gcc/gcc/config/i386/i386.c | 119 +++++++++++++++++++++++++++---- gnu/gcc/gcc/config/i386/i386.opt | 4 ++ gnu/gcc/gcc/doc/invoke.texi | 4 ++ gnu/gcc/gcc/dwarf2.h | 2 + gnu/gcc/gcc/dwarf2out.c | 4 ++ share/man/man1/gcc-local.1 | 12 +++- 6 files changed, 131 insertions(+), 14 deletions(-) diff --git a/gnu/gcc/gcc/config/i386/i386.c b/gnu/gcc/gcc/config/i386/i386.c index 6f77b9ba5a7..e1f07778881 100644 --- a/gnu/gcc/gcc/config/i386/i386.c +++ b/gnu/gcc/gcc/config/i386/i386.c @@ -997,6 +997,10 @@ struct stack_local_entry GTY(()) saved frame pointer if frame_pointer_needed <- HARD_FRAME_POINTER + [-msave-args] + + [padding0] + [saved regs] [padding1] \ @@ -1009,6 +1013,8 @@ struct stack_local_entry GTY(()) */ struct ix86_frame { + int nmsave_args; + int padding0; int nregs; int padding1; int va_arg_size; @@ -1164,6 +1170,7 @@ static const char *ix86_mangle_fundamental_type (tree); static tree ix86_stack_protect_fail (void); static rtx ix86_internal_arg_pointer (void); static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int); +static void pro_epilogue_adjust_stack (rtx, rtx, rtx, int); /* This function is only used on Solaris. */ static void i386_solaris_elf_named_section (const char *, unsigned int, tree) @@ -1897,6 +1904,8 @@ override_options (void) } else { + if (TARGET_SAVE_ARGS) + error ("-msave-args makes no sense in the 32-bit mode"); /* i386 ABI does not specify red zone. It still makes sense to use it when programmer takes care to stack from being destroyed. */ if (!(target_flags_explicit & MASK_NO_RED_ZONE)) @@ -4798,7 +4807,7 @@ ix86_can_use_return_insn_p (void) return 0; ix86_compute_frame_layout (&frame); - return frame.to_allocate == 0 && frame.nregs == 0; + return frame.to_allocate == 0 && frame.nmsave_args == 0 && frame.nregs == 0; } /* Value should be nonzero if functions must have frame pointers. @@ -4818,6 +4827,9 @@ ix86_frame_pointer_required (void) if (SUBTARGET_FRAME_POINTER_REQUIRED) return 1; + if (TARGET_SAVE_ARGS) + return 1; + /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off the frame pointer by default. Turn it back on now if we've not got a leaf function. */ @@ -5075,6 +5087,18 @@ ix86_nsaved_regs (void) return nregs; } +/* Return number of arguments to be saved on the stack with + -msave-args. */ + +static int +ix86_nsaved_args (void) +{ + if (TARGET_SAVE_ARGS) + return current_function_args_info.regno - current_function_returns_struct; + else + return 0; +} + /* Return the offset between two registers, one to be eliminated, and the other its replacement, at the start of a routine. */ @@ -5114,6 +5138,7 @@ ix86_compute_frame_layout (struct ix86_frame *frame) frame->local_size = size; frame->nregs = ix86_nsaved_regs (); + frame->nmsave_args = ix86_nsaved_args (); total_size = size; stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT; @@ -5155,6 +5180,11 @@ ix86_compute_frame_layout (struct ix86_frame *frame) else frame->save_regs_using_mov = false; + if (TARGET_SAVE_ARGS) + { + cfun->machine->use_fast_prologue_epilogue = true; + frame->save_regs_using_mov = true; + } /* Skip return address and saved base pointer. */ offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD; @@ -5174,6 +5204,16 @@ ix86_compute_frame_layout (struct ix86_frame *frame) if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT) stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT; + /* Argument save area */ + if (TARGET_SAVE_ARGS) + { + offset += frame->nmsave_args * UNITS_PER_WORD; + frame->padding0 = (frame->nmsave_args % 2) * UNITS_PER_WORD; + offset += frame->padding0; + } + else + frame->padding0 = 0; + /* Register save area */ offset += frame->nregs * UNITS_PER_WORD; @@ -5231,8 +5271,10 @@ ix86_compute_frame_layout (struct ix86_frame *frame) (size + frame->padding1 + frame->padding2 + frame->outgoing_arguments_size + frame->va_arg_size); - if ((!frame->to_allocate && frame->nregs <= 1) - || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000)) + if (!TARGET_SAVE_ARGS + && ((!frame->to_allocate && frame->nregs <= 1) + || (TARGET_64BIT + && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))) frame->save_regs_using_mov = false; if (TARGET_RED_ZONE && current_function_sp_is_unchanging @@ -5241,7 +5283,11 @@ ix86_compute_frame_layout (struct ix86_frame *frame) { frame->red_zone_size = frame->to_allocate; if (frame->save_regs_using_mov) - frame->red_zone_size += frame->nregs * UNITS_PER_WORD; + { + frame->red_zone_size + += (frame->nregs + frame->nmsave_args) * UNITS_PER_WORD; + frame->red_zone_size += frame->padding0; + } if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; } @@ -5250,6 +5296,8 @@ ix86_compute_frame_layout (struct ix86_frame *frame) frame->to_allocate -= frame->red_zone_size; frame->stack_pointer_offset -= frame->red_zone_size; #if 0 + fprintf (stderr, "nmsave_args: %i\n", frame->nmsave_args); + fprintf (stderr, "padding0: %i\n", frame->padding0); fprintf (stderr, "nregs: %i\n", frame->nregs); fprintf (stderr, "size: %i\n", size); fprintf (stderr, "alignment1: %i\n", stack_alignment_needed); @@ -5273,6 +5321,22 @@ ix86_emit_save_regs (void) unsigned int regno; rtx insn; + if (TARGET_SAVE_ARGS) + { + int i; + int nsaved = ix86_nsaved_args (); + int start = cfun->returns_struct; + for (i = start; i < start + nsaved; i++) + { + regno = x86_64_int_parameter_registers[i]; + insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno))); + RTX_FRAME_RELATED_P (insn) = 1; + } + if (nsaved % 2 != 0) + pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (-UNITS_PER_WORD), -1); + } + for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; ) if (ix86_save_reg (regno, true)) { @@ -5298,6 +5362,25 @@ ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset) RTX_FRAME_RELATED_P (insn) = 1; offset += UNITS_PER_WORD; } + + if (TARGET_SAVE_ARGS) + { + int i; + int nsaved = ix86_nsaved_args (); + int start = cfun->returns_struct; + if (nsaved % 2 != 0) + offset += UNITS_PER_WORD; + for (i = start + nsaved - 1; i >= start; i--) + { + regno = x86_64_int_parameter_registers[i]; + insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer), + Pmode, offset), + gen_rtx_REG (Pmode, regno)); + RTX_FRAME_RELATED_P (insn) = 1; + offset += UNITS_PER_WORD; + } + } + } /* Expand prologue or epilogue stack adjustment. @@ -5470,14 +5553,16 @@ ix86_expand_prologue (void) if (!frame.save_regs_using_mov) ix86_emit_save_regs (); else - allocate += frame.nregs * UNITS_PER_WORD; + allocate += (frame.nregs + frame.nmsave_args) * UNITS_PER_WORD + + frame.padding0; /* When using red zone we may start register saving before allocating the stack frame saving one cycle of the prologue. */ if (TARGET_RED_ZONE && frame.save_regs_using_mov) ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx, - -frame.nregs * UNITS_PER_WORD); + -(frame.nregs + frame.nmsave_args) + * UNITS_PER_WORD - frame.padding0); if (allocate == 0) ; @@ -5514,7 +5599,8 @@ ix86_expand_prologue (void) t = plus_constant (hard_frame_pointer_rtx, allocate - frame.to_allocate - - frame.nregs * UNITS_PER_WORD); + - (frame.nregs + frame.nmsave_args) + * UNITS_PER_WORD - frame.padding0); else t = plus_constant (stack_pointer_rtx, allocate); emit_move_insn (eax, gen_rtx_MEM (SImode, t)); @@ -5523,11 +5609,13 @@ ix86_expand_prologue (void) if (frame.save_regs_using_mov && !TARGET_RED_ZONE) { - if (!frame_pointer_needed || !frame.to_allocate) + if (!TARGET_SAVE_ARGS && + (!frame_pointer_needed || !frame.to_allocate)) ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate); else ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx, - -frame.nregs * UNITS_PER_WORD); + -(frame.nregs + frame.nmsave_args) + * UNITS_PER_WORD - frame.padding0); } pic_reg_used = false; @@ -5611,10 +5699,11 @@ ix86_expand_epilogue (int style) must be taken for the normal return case of a function using eh_return: the eax and edx registers are marked as saved, but not restored along this path. */ - offset = frame.nregs; + offset = frame.nregs + frame.nmsave_args; if (current_function_calls_eh_return && style != 2) offset -= 2; offset *= -UNITS_PER_WORD; + offset -= frame.padding0; /* If we're only restoring one register and sp is not valid then using a move instruction to restore the register since it's @@ -5670,14 +5759,16 @@ ix86_expand_epilogue (int style) { tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); tmp = plus_constant (tmp, (frame.to_allocate - + frame.nregs * UNITS_PER_WORD)); + + (frame.nregs + frame.nmsave_args) + * UNITS_PER_WORD + frame.padding0)); emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp)); } } else if (!frame_pointer_needed) pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (frame.to_allocate - + frame.nregs * UNITS_PER_WORD), + + (frame.nregs + frame.nmsave_args) + * UNITS_PER_WORD + frame.padding0), style); /* If not an i386, mov & pop is faster than "leave". */ else if (TARGET_USE_LEAVE || optimize_size @@ -5717,6 +5808,10 @@ ix86_expand_epilogue (int style) else emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno))); } + if (frame.nmsave_args) + pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (frame.nmsave_args * UNITS_PER_WORD + + frame.padding0), style); if (frame_pointer_needed) { /* Leave results in shorter dependency chains on CPUs that are diff --git a/gnu/gcc/gcc/config/i386/i386.opt b/gnu/gcc/gcc/config/i386/i386.opt index 2922f5d2fe0..152db6cacf0 100644 --- a/gnu/gcc/gcc/config/i386/i386.opt +++ b/gnu/gcc/gcc/config/i386/i386.opt @@ -221,6 +221,10 @@ mtls-direct-seg-refs Target Report Mask(TLS_DIRECT_SEG_REFS) Use direct references against %gs when accessing tls data +msave-args +Target Report Mask(SAVE_ARGS) +Save integer arguments on the stack at function entry + mtune= Target RejectNegative Joined Var(ix86_tune_string) Schedule code for given CPU diff --git a/gnu/gcc/gcc/doc/invoke.texi b/gnu/gcc/gcc/doc/invoke.texi index 60bc49cb343..aa2eeb8ff43 100644 --- a/gnu/gcc/gcc/doc/invoke.texi +++ b/gnu/gcc/gcc/doc/invoke.texi @@ -9806,6 +9806,10 @@ building of shared libraries are not supported with the medium model. Generate code for the large model: This model makes no assumptions about addresses and sizes of sections. Currently GCC does not implement this model. + +@item -msave-args +@opindex msave-args +Save integer arguments on the stack at function entry. @end table @node IA-64 Options diff --git a/gnu/gcc/gcc/dwarf2.h b/gnu/gcc/gcc/dwarf2.h index 31188372787..642858d5928 100644 --- a/gnu/gcc/gcc/dwarf2.h +++ b/gnu/gcc/gcc/dwarf2.h @@ -371,6 +371,8 @@ enum dwarf_attribute DW_AT_GNU_vector = 0x2107, /* VMS extensions. */ DW_AT_VMS_rtnbeg_pd_address = 0x2201, + /* Sun extension. */ + DW_AT_SUN_amd64_parmdump = 0x2224, /* UPC extension. */ DW_AT_upc_threads_scaled = 0x3210, /* PGI (STMicroelectronics) extensions. */ diff --git a/gnu/gcc/gcc/dwarf2out.c b/gnu/gcc/gcc/dwarf2out.c index 30bb90f1b5b..7013eead14e 100644 --- a/gnu/gcc/gcc/dwarf2out.c +++ b/gnu/gcc/gcc/dwarf2out.c @@ -11960,6 +11960,10 @@ gen_subprogram_die (tree decl, dw_die_ref context_die) /* Add the calling convention attribute if requested. */ add_calling_convention_attribute (subr_die, TREE_TYPE (decl)); +#ifdef TARGET_SAVE_ARGS + if (TARGET_SAVE_ARGS) + add_AT_flag (subr_die, DW_AT_SUN_amd64_parmdump, 1); +#endif } /* Generate a DIE to represent a declared data object. */ diff --git a/share/man/man1/gcc-local.1 b/share/man/man1/gcc-local.1 index 83e16db1717..63729a177c1 100644 --- a/share/man/man1/gcc-local.1 +++ b/share/man/man1/gcc-local.1 @@ -1,4 +1,4 @@ -.\" $OpenBSD: gcc-local.1,v 1.51 2017/02/14 12:50:15 jmc Exp $ +.\" $OpenBSD: gcc-local.1,v 1.52 2017/04/27 00:02:03 dlg Exp $ .\" .\" Copyright (c) 2002 Marc Espie .\" Copyright (c) 2003 Anil Madhavapeddy @@ -25,7 +25,7 @@ .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .\" -.Dd $Mdocdate: February 14 2017 $ +.Dd $Mdocdate: April 27 2017 $ .Dt GCC-LOCAL 1 .Os .Sh NAME @@ -160,6 +160,14 @@ which includes additional functions to be protected \(em those that have local array definitions or have references to local frame addresses. .It +On amd64, +.Fl msave-args +can be passed to the compiler to have functions save their register +arguments on the stack, while maintaining compatability with the +System 5 AMD64 ABI. +This enables tools and debuggers that understand this semantic to +trivially generate stack traces that include function arguments. +.It On the alpha, amd64, arm, hppa, i386, mips64, powerpc, sh and sparc64 architectures, .Nm gcc -- 2.20.1