From 840df46fd94f20eb816a52fce31f3e76ee2f59fc Mon Sep 17 00:00:00 2001 From: jasper Date: Fri, 3 Sep 2021 16:45:44 +0000 Subject: [PATCH] add kprobes provider for dt this allows us to dynamically trace function boundaries with btrace by patching prologues and epilogues with a breakpoint upon which the handler records the data, sends it back to userland for btrace to consume. currently it's hidden behind DDBPROF, and there is still a lot to cleanup and improve, but basic scripts that observe return codes from a probed function work. from Tom Rollet, with various changes by me feedback and ok mpi@ --- sys/arch/amd64/amd64/vector.S | 32 ++- sys/arch/i386/i386/locore.s | 60 ++++- sys/conf/files | 3 +- sys/ddb/db_prof.c | 168 ++---------- sys/dev/dt/dt_dev.c | 43 +++- sys/dev/dt/dt_prov_kprobe.c | 464 ++++++++++++++++++++++++++++++++++ sys/dev/dt/dt_prov_profile.c | 14 +- sys/dev/dt/dt_prov_static.c | 14 +- sys/dev/dt/dt_prov_syscall.c | 12 +- sys/dev/dt/dtvar.h | 19 +- sys/kern/subr_prof.c | 6 +- usr.sbin/btrace/btrace.c | 13 +- 12 files changed, 650 insertions(+), 198 deletions(-) create mode 100644 sys/dev/dt/dt_prov_kprobe.c diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S index dd2dfde3e3b..cc8625bfe33 100644 --- a/sys/arch/amd64/amd64/vector.S +++ b/sys/arch/amd64/amd64/vector.S @@ -1,4 +1,4 @@ -/* $OpenBSD: vector.S,v 1.84 2020/11/13 05:32:08 guenther Exp $ */ +/* $OpenBSD: vector.S,v 1.85 2021/09/03 16:45:44 jasper Exp $ */ /* $NetBSD: vector.S,v 1.5 2004/06/28 09:13:11 fvdl Exp $ */ /* @@ -188,10 +188,11 @@ INTRENTRY_LABEL(trap03): sti cld SMAP_CLAC - movq %rsp, %rdi - call _C_LABEL(db_prof_hook) - cmpl $1, %eax - jne .Lreal_kern_trap + leaq _C_LABEL(dt_prov_kprobe), %rdi + movq %rsp, %rsi + call _C_LABEL(dt_prov_kprobe_hook) + cmpl $0, %eax + je .Lreal_kern_trap cli movq TF_RDI(%rsp),%rdi @@ -210,6 +211,11 @@ INTRENTRY_LABEL(trap03): movq TF_R11(%rsp),%r11 /* %rax restored below, after being used to shift the stack */ + cmpl $2, %eax + je .Lemulate_ret + +.Lemulate_push_rbp: + /* * We are returning from a probe trap so we need to fix the * stack layout and emulate the patched instruction. @@ -217,6 +223,9 @@ INTRENTRY_LABEL(trap03): */ subq $16, %rsp + movq (TF_RAX + 16)(%rsp), %rax + movq %rax, TF_RAX(%rsp) + /* Shift hardware-saved registers. */ movq (TF_RIP + 16)(%rsp), %rax movq %rax, TF_RIP(%rsp) @@ -237,7 +246,20 @@ INTRENTRY_LABEL(trap03): /* Finally restore %rax */ movq (TF_RAX + 16)(%rsp),%rax + jmp .ret_int3 + +.Lemulate_ret: + + /* Store a new return address in %rip */ + movq TF_RSP(%rsp), %rax + movq (%rax), %rax + movq %rax, TF_RIP(%rsp) + addq $8, TF_RSP(%rsp) + + /* Finally restore %rax */ + movq (TF_RAX)(%rsp),%rax +.ret_int3: addq $TF_RIP,%rsp iretq #endif /* !defined(GPROF) && defined(DDBPROF) */ diff --git a/sys/arch/i386/i386/locore.s b/sys/arch/i386/i386/locore.s index c51416aacd8..c7c04dc8c7b 100644 --- a/sys/arch/i386/i386/locore.s +++ b/sys/arch/i386/i386/locore.s @@ -1,4 +1,4 @@ -/* $OpenBSD: locore.s,v 1.192 2021/09/02 12:32:22 jasper Exp $ */ +/* $OpenBSD: locore.s,v 1.193 2021/09/03 16:45:44 jasper Exp $ */ /* $NetBSD: locore.s,v 1.145 1996/05/03 19:41:19 christos Exp $ */ /*- @@ -205,7 +205,8 @@ INTRENTRY_LABEL(label): /* from kernel */ ; \ #define INTRFASTEXIT \ jmp intr_fast_exit -#define INTR_FAKE_TRAP 0xbadabada +#define INTR_FAKE_TRAP_PUSH_RPB 0xbadabada +#define INTR_FAKE_TRAP_POP_RBP 0xbcbcbcbc /* * PTmap is recursive pagemap at top of virtual address space. @@ -1259,17 +1260,32 @@ calltrap: jne .Lreal_trap pushl %esp - call _C_LABEL(db_prof_hook) - addl $4,%esp - cmpl $1,%eax - jne .Lreal_trap + subl $4, %esp + pushl %eax + leal _C_LABEL(dt_prov_kprobe), %eax + movl %eax, 4(%esp) + popl %eax + call _C_LABEL(dt_prov_kprobe_hook) + addl $8, %esp + cmpl $0, %eax + je .Lreal_trap /* * Abuse the error field to indicate that INTRFASTEXIT needs * to emulate the patched instruction. */ - movl $INTR_FAKE_TRAP, TF_ERR(%esp) - jz .Lalltraps_check_asts + cmpl $1, %eax + je .Lset_emulate_push_rbp + + cmpl $2, %eax + je .Lset_emulate_ret + +.Lset_emulate_push_rbp: + movl $INTR_FAKE_TRAP_PUSH_RPB, TF_ERR(%esp) + jmp .Lalltraps_check_asts +.Lset_emulate_ret: + movl $INTR_FAKE_TRAP_POP_RBP, TF_ERR(%esp) + jmp .Lalltraps_check_asts .Lreal_trap: #endif /* !defined(GPROF) && defined(DDBPROF) */ pushl %esp @@ -1298,8 +1314,10 @@ calltrap: * The code below does that by trashing %eax, so it MUST be * restored afterward. */ - cmpl $INTR_FAKE_TRAP, TF_ERR(%esp) - je .Lprobe_fixup + cmpl $INTR_FAKE_TRAP_PUSH_RPB, TF_ERR(%esp) + je .Lprobe_fixup_push_rbp + cmpl $INTR_FAKE_TRAP_POP_RBP, TF_ERR(%esp) + je .Lprobe_fixup_pop_rbp #endif /* !defined(GPROF) && defined(DDBPROF) */ #ifndef DIAGNOSTIC INTRFASTEXIT @@ -1327,7 +1345,7 @@ spl_lowered: .text #if !defined(GPROF) && defined(DDBPROF) -.Lprobe_fixup: +.Lprobe_fixup_push_rbp: /* Restore all register unwinding the stack. */ INTR_RESTORE_ALL @@ -1352,6 +1370,26 @@ spl_lowered: popl %eax iret +.Lprobe_fixup_pop_rbp: + /* Restore all register unwinding the stack. */ + INTR_RESTORE_ALL + + movl %eax, 0(%esp) + + /* pop %ebp */ + movl 20(%esp), %ebp + /* Shift hardware-saved registers: eflags, cs, eip */ + movl 16(%esp), %eax + movl %eax, 20(%esp) + movl 12(%esp), %eax + movl %eax, 16(%esp) + movl 8(%esp), %eax + movl %eax, 12(%esp) + + /* Pop eax and restore the stack pointer */ + popl %eax + addl $8, %esp + iret #endif /* !defined(GPROF) && defined(DDBPROF) */ .text diff --git a/sys/conf/files b/sys/conf/files index 8ec908f3918..23722bc0b64 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1,4 +1,4 @@ -# $OpenBSD: files,v 1.703 2021/06/29 01:42:14 jsg Exp $ +# $OpenBSD: files,v 1.704 2021/09/03 16:45:45 jasper Exp $ # $NetBSD: files,v 1.87 1996/05/19 17:17:50 jonathan Exp $ # @(#)files.newconf 7.5 (Berkeley) 5/10/93 @@ -632,6 +632,7 @@ file dev/dt/dt_dev.c dt needs-flag file dev/dt/dt_prov_profile.c dt file dev/dt/dt_prov_syscall.c dt file dev/dt/dt_prov_static.c dt +file dev/dt/dt_prov_kprobe.c dt # XXX machine-independent SCSI files should live somewhere here, maybe diff --git a/sys/ddb/db_prof.c b/sys/ddb/db_prof.c index 7d8190f41bc..fe77f927f65 100644 --- a/sys/ddb/db_prof.c +++ b/sys/ddb/db_prof.c @@ -1,20 +1,5 @@ -/* $OpenBSD: db_prof.c,v 1.4 2017/08/11 15:14:23 nayden Exp $ */ +/* $OpenBSD: db_prof.c,v 1.5 2021/09/03 16:45:45 jasper Exp $ */ -/* - * Copyright (c) 2016 Martin Pieuchot - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ /*- * Copyright (c) 1983, 1992, 1993 * The Regents of the University of California. All rights reserved. @@ -43,7 +28,6 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ - #include #include #include @@ -54,173 +38,58 @@ #include #include -#include /* for db_write_bytes() */ #include -extern char etext[]; - -struct prof_probe { - const char *pp_name; - Elf_Sym *pp_symb; - SLIST_ENTRY(prof_probe) pp_next; - vaddr_t pp_inst; - int pp_on; -}; +#include "dt.h" /* for NDT */ -#define PPTSIZE PAGE_SIZE -#define PPTMASK ((PPTSIZE / sizeof(struct prof_probe)) - 1) -#define INSTTOIDX(inst) ((((unsigned long)(inst)) >> 4) & PPTMASK) -SLIST_HEAD(, prof_probe) *pp_table; +#if NDT > 0 +#include +#endif extern int db_profile; /* Allow dynamic profiling */ int db_prof_on; /* Profiling state On/Off */ -vaddr_t db_get_pc(struct trapframe *); -vaddr_t db_get_probe_addr(struct trapframe *); - -void db_prof_forall(Elf_Sym *, char *, char *, int, void *); -void db_prof_count(unsigned long, unsigned long); - -void -db_prof_init(void) -{ - unsigned long nentries; - - pp_table = malloc(PPTSIZE, M_TEMP, M_NOWAIT|M_ZERO); - if (pp_table == NULL) - return; - - db_elf_sym_forall(db_prof_forall, &nentries); - printf("ddb probe table references %lu entry points\n", nentries); -} - -void -db_prof_forall(Elf_Sym *sym, char *name, char *suff, int pre, void *xarg) -{ - Elf_Sym *symb = sym; - unsigned long *nentries = xarg; - struct prof_probe *pp; - vaddr_t inst; - - if (ELF_ST_TYPE(symb->st_info) != STT_FUNC) - return; - - inst = symb->st_value; - if (inst < KERNBASE || inst >= (vaddr_t)&etext) - return; - - if (*((uint8_t *)inst) != SSF_INST) - return; - - if (strncmp(name, "db_", 3) == 0 || strncmp(name, "trap", 4) == 0) - return; - -#ifdef __i386__ - /* Avoid a recursion in db_write_text(). */ - if (strncmp(name, "pmap_pte", 8) == 0) - return; -#endif - - pp = malloc(sizeof(struct prof_probe), M_TEMP, M_NOWAIT|M_ZERO); - if (pp == NULL) - return; - - pp->pp_name = name; - pp->pp_inst = inst; - pp->pp_symb = symb; - - SLIST_INSERT_HEAD(&pp_table[INSTTOIDX(pp->pp_inst)], pp, pp_next); +void dt_prov_kprobe_patch_all_entry(void); +void dt_prov_kprobe_depatch_all_entry(void); - (*nentries)++; -} +vaddr_t db_get_probe_addr(struct trapframe *); +vaddr_t db_get_pc(struct trapframe *); int db_prof_enable(void) { -#if defined(__amd64__) || defined(__i386__) - struct prof_probe *pp; - uint8_t patch = BKPT_INST; - unsigned long s; - int i; - +#if NDT > 0 if (!db_profile) return EPERM; - if (pp_table == NULL) - return ENOENT; - - KASSERT(BKPT_SIZE == SSF_SIZE); - - s = intr_disable(); - for (i = 0; i < (PPTSIZE / sizeof(*pp)); i++) { - SLIST_FOREACH(pp, &pp_table[i], pp_next) { - pp->pp_on = 1; - db_write_bytes(pp->pp_inst, BKPT_SIZE, &patch); - } - } - intr_restore(s); - + dt_prov_kprobe_patch_all_entry(); db_prof_on = 1; - return 0; #else return ENOENT; -#endif +#endif /* NDT > 0 */ } void db_prof_disable(void) { - struct prof_probe *pp; - uint8_t patch = SSF_INST; - unsigned long s; - int i; - +#if NDT > 0 db_prof_on = 0; - - s = intr_disable(); - for (i = 0; i < (PPTSIZE / sizeof(*pp)); i++) { - SLIST_FOREACH(pp, &pp_table[i], pp_next) { - db_write_bytes(pp->pp_inst, SSF_SIZE, &patch); - pp->pp_on = 0; - } - } - intr_restore(s); -} - -int -db_prof_hook(struct trapframe *frame) -{ - struct prof_probe *pp; - vaddr_t pc, inst; - - if (pp_table == NULL) - return 0; - - pc = db_get_pc(frame); - inst = db_get_probe_addr(frame); - - SLIST_FOREACH(pp, &pp_table[INSTTOIDX(inst)], pp_next) { - if (pp->pp_on && pp->pp_inst == inst) { - if (db_prof_on) - db_prof_count(pc, inst); - return 1; - } - } - - return 0; + dt_prov_kprobe_depatch_all_entry(); +#endif /* NDT > 0 */ } /* * Equivalent to mcount(), must be called with interrupt disabled. */ void -db_prof_count(unsigned long frompc, unsigned long selfpc) +db_prof_count(struct trapframe *frame) { unsigned short *frompcindex; struct tostruct *top, *prevtop; struct gmonparam *p; long toindex; + unsigned long frompc, selfpc; if ((p = curcpu()->ci_gmon) == NULL) return; @@ -232,6 +101,9 @@ db_prof_count(unsigned long frompc, unsigned long selfpc) if (p->state != GMON_PROF_ON) return; + frompc = db_get_pc(frame); + selfpc = db_get_probe_addr(frame); + /* * check that frompcindex is a reasonable pc value. * for example: signal catchers get called from the stack, diff --git a/sys/dev/dt/dt_dev.c b/sys/dev/dt/dt_dev.c index 03f2658ecc9..7c781df0d0f 100644 --- a/sys/dev/dt/dt_dev.c +++ b/sys/dev/dt/dt_dev.c @@ -1,4 +1,4 @@ -/* $OpenBSD: dt_dev.c,v 1.14 2021/05/22 21:25:38 bluhm Exp $ */ +/* $OpenBSD: dt_dev.c,v 1.15 2021/09/03 16:45:45 jasper Exp $ */ /* * Copyright (c) 2019 Martin Pieuchot @@ -124,7 +124,7 @@ int dt_ioctl_get_stats(struct dt_softc *, struct dtioc_stat *); int dt_ioctl_record_start(struct dt_softc *); void dt_ioctl_record_stop(struct dt_softc *); int dt_ioctl_probe_enable(struct dt_softc *, struct dtioc_req *); -void dt_ioctl_probe_disable(struct dt_softc *, struct dtioc_req *); +int dt_ioctl_probe_disable(struct dt_softc *, struct dtioc_req *); int dt_pcb_ring_copy(struct dt_pcb *, struct dt_evt *, size_t, uint64_t *); @@ -138,6 +138,9 @@ dtattach(struct device *parent, struct device *self, void *aux) dt_nprobes += dt_prov_profile_init(); dt_nprobes += dt_prov_syscall_init(); dt_nprobes += dt_prov_static_init(); +#ifdef DDBPROF + dt_nprobes += dt_prov_kprobe_init(); +#endif printf("dt: %u probes\n", dt_nprobes); } @@ -275,6 +278,7 @@ dtioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p) return dt_ioctl_get_stats(sc, (struct dtioc_stat *)addr); case DTIOCRECORD: case DTIOCPRBENABLE: + case DTIOCPRBDISABLE: /* root only ioctl(2) */ break; default: @@ -295,6 +299,9 @@ dtioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p) case DTIOCPRBENABLE: error = dt_ioctl_probe_enable(sc, (struct dtioc_req *)addr); break; + case DTIOCPRBDISABLE: + error = dt_ioctl_probe_disable(sc, (struct dtioc_req *)addr); + break; default: KASSERT(0); } @@ -478,6 +485,35 @@ dt_ioctl_probe_enable(struct dt_softc *sc, struct dtioc_req *dtrq) return 0; } +int +dt_ioctl_probe_disable(struct dt_softc *sc, struct dtioc_req *dtrq) +{ + struct dt_probe *dtp; + int error; + + KASSERT(suser(curproc) == 0); + if (!dtioc_req_isvalid(dtrq)) + return EINVAL; + + SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) { + if (dtp->dtp_pbn == dtrq->dtrq_pbn) + break; + } + if (dtp == NULL) + return ENOENT; + + if (dtp->dtp_prov->dtpv_dealloc) { + error = dtp->dtp_prov->dtpv_dealloc(dtp, sc, dtrq); + if (error) + return error; + } + + DPRINTF("dt%d: pid %d dealloc\n", sc->ds_unit, sc->ds_pid, + dtrq->dtrq_pbn); + + return 0; +} + struct dt_probe * dt_dev_alloc_probe(const char *func, const char *name, struct dt_provider *dtpv) { @@ -492,6 +528,9 @@ dt_dev_alloc_probe(const char *func, const char *name, struct dt_provider *dtpv) dtp->dtp_func = func; dtp->dtp_name = name; dtp->dtp_sysnum = -1; + dtp->dtp_ref = 0; + + mtx_init(&dtp->dtp_mtx, IPL_HIGH); return dtp; } diff --git a/sys/dev/dt/dt_prov_kprobe.c b/sys/dev/dt/dt_prov_kprobe.c new file mode 100644 index 00000000000..f239ad15e9c --- /dev/null +++ b/sys/dev/dt/dt_prov_kprobe.c @@ -0,0 +1,464 @@ +/* $OpenBSD: dt_prov_kprobe.c,v 1.1 2021/09/03 16:45:45 jasper Exp $ */ + +/* + * Copyright (c) 2020 Tom Rollet + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#if defined(DDBPROF) && (defined(__amd64__) || defined(__i386__)) + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +int dt_prov_kprobe_alloc(struct dt_probe *dtp, struct dt_softc *sc, + struct dt_pcb_list *plist, struct dtioc_req *dtrq); +int dt_prov_kprobe_hook(struct dt_provider *dtpv, ...); +int dt_prov_kprobe_dealloc(struct dt_probe *dtp, struct dt_softc *sc, + struct dtioc_req *dtrq); + +void db_prof_count(struct trapframe *frame); + +struct kprobe_probe { + struct dt_probe* dtp; + SLIST_ENTRY(kprobe_probe) kprobe_next; +}; + +/* Bob Jenkin's public domain 32-bit integer hashing function. + * Original at https://burtleburtle.net/bob/hash/integer.html. + */ +uint32_t +ptr_hash(uint32_t a) { + a = (a + 0x7ed55d16) + (a<<12); + a = (a ^ 0xc761c23c) ^ (a>>19); + a = (a + 0x165667b1) + (a<<5); + a = (a + 0xd3a2646c) ^ (a<<9); + a = (a + 0xfd7046c5) + (a<<3); + a = (a ^ 0xb55a4f09) ^ (a>>16); + return a; +} + +#define PPTSIZE PAGE_SIZE * 30 +#define PPTMASK ((PPTSIZE / sizeof(struct kprobe_probe)) - 1) +#define INSTTOIDX(inst) (ptr_hash(inst) & PPTMASK) + +SLIST_HEAD(, kprobe_probe) *dtpf_entry; +SLIST_HEAD(, kprobe_probe) *dtpf_return; +int nb_probes_entry = 0; +int nb_probes_return = 0; + +#define DTEVT_PROV_KPROBE (DTEVT_COMMON|DTEVT_FUNCARGS) + +#define KPROBE_ENTRY "entry" +#define KPROBE_RETURN "return" + +#if defined(__amd64__) +#define KPROBE_RETGUARD_MOV_1 0x4c +#define KPROBE_RETGUARD_MOV_2 0x8b +#define KPROBE_RETGUARD_MOV_3 0x1d + +#define KPROBE_RETGUARD_MOV_SIZE 7 + +#define KPROBE_RETGUARD_XOR_1 0x4c +#define KPROBE_RETGUARD_XOR_2 0x33 +#define KPROBE_RETGUARD_XOR_3 0x1c + +#define KPROBE_RETGUARD_XOR_SIZE 4 + +#define RET 0xc3 +#define RET_SIZE 1 +#elif defined(__i386__) +#define POP_RBP 0x5d +#define POP_RBP_SIZE 1 +#endif + +struct dt_provider dt_prov_kprobe = { + .dtpv_name = "kprobe", + .dtpv_alloc = dt_prov_kprobe_alloc, + .dtpv_enter = dt_prov_kprobe_hook, + .dtpv_leave = NULL, + .dtpv_dealloc = dt_prov_kprobe_dealloc, +}; + +extern db_symtab_t db_symtab; +extern char __kutext_end[]; +extern int db_prof_on; + +/* Initialize all entry and return probes and store them in global arrays */ +int +dt_prov_kprobe_init(void) +{ + struct dt_probe *dtp; + struct kprobe_probe *kprobe_dtp; + Elf_Sym *symp, *symtab_start, *symtab_end; + char *strtab, *name; + vaddr_t inst, limit; + int nb_sym, nb_probes; + + nb_sym = (db_symtab.end - db_symtab.start) / sizeof (Elf_Sym); + nb_probes = nb_probes_entry = nb_probes_return = 0; + + dtpf_entry = malloc(PPTSIZE, M_DT, M_NOWAIT|M_ZERO); + if (dtpf_entry == NULL) + goto end; + + dtpf_return = malloc(PPTSIZE, M_DT, M_NOWAIT|M_ZERO); + if (dtpf_return == NULL) + goto end; + + db_symtab_t *stab = &db_symtab; + + symtab_start = STAB_TO_SYMSTART(stab); + symtab_end = STAB_TO_SYMEND(stab); + + strtab = db_elf_find_strtab(stab); + + for (symp = symtab_start; symp < symtab_end; symp++) { + if (ELF_ST_TYPE(symp->st_info) != STT_FUNC) + continue; + + inst = symp->st_value; + name = strtab + symp->st_name; + limit = symp->st_value + symp->st_size; + + /* Filter function that are not mapped in memory */ + if (inst < KERNBASE || inst >= (vaddr_t)&__kutext_end) + continue; + + /* Remove some function to avoid recursive tracing */ + if (strncmp(name, "dt_", 3) == 0 || strncmp(name, "trap", 4) == 0 || + strncmp(name, "db_", 3) == 0) + continue; + +#if defined(__amd64__) + /* Find if there is a retguard, if so move the inst pointer to the later 'push rbp' */ + if (*((uint8_t *)inst) != SSF_INST) { + /* No retguards in i386 */ + if (((uint8_t *)inst)[0] != KPROBE_RETGUARD_MOV_1 || + ((uint8_t *)inst)[1] != KPROBE_RETGUARD_MOV_2 || + ((uint8_t *)inst)[2] != KPROBE_RETGUARD_MOV_3 || + ((uint8_t *)inst)[KPROBE_RETGUARD_MOV_SIZE] != KPROBE_RETGUARD_XOR_1 || + ((uint8_t *)inst)[KPROBE_RETGUARD_MOV_SIZE + 1] != KPROBE_RETGUARD_XOR_2 || + ((uint8_t *)inst)[KPROBE_RETGUARD_MOV_SIZE + 2] != KPROBE_RETGUARD_XOR_3 || + ((uint8_t *)inst)[KPROBE_RETGUARD_MOV_SIZE + KPROBE_RETGUARD_XOR_SIZE] != SSF_INST) + continue; + inst = (vaddr_t)&(((uint8_t *)inst)[KPROBE_RETGUARD_MOV_SIZE + KPROBE_RETGUARD_XOR_SIZE]); + } +#elif defined(__i386__) + if (*((uint8_t *)inst) != SSF_INST) + continue; +#endif + + dtp = dt_dev_alloc_probe(name, KPROBE_ENTRY, &dt_prov_kprobe); + if (dtp == NULL) + goto end; + + kprobe_dtp = malloc(sizeof(struct kprobe_probe), M_TEMP, M_NOWAIT|M_ZERO); + if (kprobe_dtp == NULL) + goto end; + kprobe_dtp->dtp = dtp; + + dtp->dtp_addr = inst; + dtp->dtp_nargs = db_ctf_func_numargs(symp); + dt_dev_register_probe(dtp); + + SLIST_INSERT_HEAD(&dtpf_entry[INSTTOIDX(dtp->dtp_addr)], kprobe_dtp, kprobe_next); + + nb_probes++; + nb_probes_entry++; + + /* + * Poor method to find the return point + * => we would need a disassembler to find all return points + * For now we start from the end of the function, iterate on + * int3 inserted for retguard until we find a ret + */ +#if defined(__amd64__) + if (*(uint8_t *)(limit - 1) != RET) + continue; + inst = limit - 1; +#elif defined(__i386__) + /* + * Little temporary hack to find some return probe + * => always int3 after 'pop %rpb; ret' + */ + while(*((uint8_t *)inst) == 0xcc) + (*(uint8_t *)inst) -= 1; + if (*(uint8_t *)(limit - 2) != POP_RBP) + continue; + inst = limit - 2; +#endif + + dtp = dt_dev_alloc_probe(name, KPROBE_RETURN, &dt_prov_kprobe); + if (dtp == NULL) + goto end; + + kprobe_dtp = malloc(sizeof(struct kprobe_probe), M_TEMP, M_NOWAIT|M_ZERO); + if (kprobe_dtp == NULL) + goto end; + kprobe_dtp->dtp = dtp; + + dtp->dtp_addr = inst; + dt_dev_register_probe(dtp); + SLIST_INSERT_HEAD(&dtpf_return[INSTTOIDX(dtp->dtp_addr)], kprobe_dtp, kprobe_next); + nb_probes++; + nb_probes_return++; + } +end: + return nb_probes; +} + +int +dt_prov_kprobe_alloc(struct dt_probe *dtp, struct dt_softc *sc, + struct dt_pcb_list *plist, struct dtioc_req *dtrq) +{ + uint8_t patch = BKPT_INST; + struct dt_pcb *dp; + unsigned s; + + dp = dt_pcb_alloc(dtp, sc); + if (dp == NULL) + return ENOMEM; + + /* Patch only if it's first pcb referencing this probe */ + mtx_enter(&dtp->dtp_mtx); + dtp->dtp_ref++; + KASSERT(dtp->dtp_ref != 0); + + if (dtp->dtp_ref == 1) { + s = intr_disable(); + db_write_bytes(dtp->dtp_addr, BKPT_SIZE, &patch); + intr_restore(s); + } + mtx_leave(&dtp->dtp_mtx); + + dp->dp_filter = dtrq->dtrq_filter; + dp->dp_evtflags = dtrq->dtrq_evtflags & DTEVT_PROV_KPROBE; + TAILQ_INSERT_HEAD(plist, dp, dp_snext); + return 0; +} + +int +dt_prov_kprobe_dealloc(struct dt_probe *dtp, struct dt_softc *sc, + struct dtioc_req *dtrq) +{ + uint8_t patch; + int size; + unsigned s; + + if (strcmp(dtp->dtp_name, KPROBE_ENTRY) == 0) { + patch = SSF_INST; + size = SSF_SIZE; + } else if (strcmp(dtp->dtp_name, KPROBE_RETURN) == 0) { +#if defined(__amd64__) + patch = RET; + size = RET_SIZE; +#elif defined(__i386__) + patch = POP_RBP; + size = POP_RBP_SIZE; +#endif + } else + KASSERT(0 && "Trying to dealloc not yet implemented probe type"); + + mtx_enter(&dtp->dtp_mtx); + dtp->dtp_ref--; + + if (dtp->dtp_ref == 0) { + s = intr_disable(); + db_write_bytes(dtp->dtp_addr, size, &patch); + intr_restore(s); + } + + mtx_leave(&dtp->dtp_mtx); + + /* Deallocation of PCB is done by dt_pcb_purge when closing the dev */ + return 0; +} + +int +dt_prov_kprobe_hook(struct dt_provider *dtpv, ...) +{ + struct dt_probe *dtp; + struct dt_pcb *dp; + struct trapframe *tf; + struct kprobe_probe *kprobe_dtp; + va_list ap; + int is_dt_bkpt = 0; + int error; /* Return values for return probes*/ + vaddr_t *args; + size_t argsize; + register_t retval[2]; + + KASSERT(dtpv == &dt_prov_kprobe); + + va_start(ap, dtpv); + tf = va_arg(ap, struct trapframe*); + va_end(ap); + +#if defined(__amd64__) + vaddr_t addr = tf->tf_rip - BKPT_SIZE; +#elif defined(__i386) + vaddr_t addr = tf->tf_eip - BKPT_SIZE; +#endif + + SLIST_FOREACH(kprobe_dtp, &dtpf_entry[INSTTOIDX(addr)], kprobe_next) { + dtp = kprobe_dtp->dtp; + + if (dtp->dtp_addr != addr) + continue; + + is_dt_bkpt = 1; + if (db_prof_on) + db_prof_count(tf); + + if (!dtp->dtp_recording) + continue; + + smr_read_enter(); + SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) { + struct dt_evt *dtev; + + dtev = dt_pcb_ring_get(dp, 0); + if (dtev == NULL) + continue; + +#if defined(__amd64__) + args = (vaddr_t *)tf->tf_rdi; + /* XXX: use CTF to get the number of arguments. */ + argsize = 6; +#elif defined(__i386__) + /* All args on stack */ + args = (vaddr_t *)(tf->tf_esp + 4); + argsize = 10; +#endif + + if (ISSET(dp->dp_evtflags, DTEVT_FUNCARGS)) + memcpy(dtev->dtev_args, args, argsize); + + dt_pcb_ring_consume(dp, dtev); + } + smr_read_leave(); + } + + if (is_dt_bkpt) + return is_dt_bkpt; + + SLIST_FOREACH(kprobe_dtp, &dtpf_return[INSTTOIDX(addr)], kprobe_next) { + dtp = kprobe_dtp->dtp; + + if (dtp->dtp_addr != addr) + continue; + + is_dt_bkpt = 2; + + if (!dtp->dtp_recording) + continue; + + smr_read_enter(); + SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) { + struct dt_evt *dtev; + + dtev = dt_pcb_ring_get(dp, 0); + if (dtev == NULL) + continue; + +#if defined(__amd64__) + retval[0] = tf->tf_rax; + retval[1] = 0; + error = 0; +#elif defined(__i386) + retval[0] = tf->tf_eax; + retval[1] = 0; + error = 0; +#endif + + dtev->dtev_retval[0] = retval[0]; + dtev->dtev_retval[1] = retval[1]; + dtev->dtev_error = error; + + dt_pcb_ring_consume(dp, dtev); + } + smr_read_leave(); + } + return is_dt_bkpt; +} + +/* Function called by ddb to patch all functions without allocating 1 pcb per probe */ +void +dt_prov_kprobe_patch_all_entry(void) +{ + uint8_t patch = BKPT_INST; + struct dt_probe *dtp; + struct kprobe_probe *kprobe_dtp; + size_t i; + + for (i = 0; i < PPTMASK; ++i) { + SLIST_FOREACH(kprobe_dtp, &dtpf_entry[i], kprobe_next) { + dtp = kprobe_dtp->dtp; + + mtx_enter(&dtp->dtp_mtx); + dtp->dtp_ref++; + + if (dtp->dtp_ref == 1) { + unsigned s; + s = intr_disable(); + db_write_bytes(dtp->dtp_addr, BKPT_SIZE, &patch); + intr_restore(s); + } + + mtx_leave(&dtp->dtp_mtx); + } + } +} + +/* Function called by ddb to patch all functions without allocating 1 pcb per probe */ +void +dt_prov_kprobe_depatch_all_entry(void) +{ + uint8_t patch = SSF_INST; + struct dt_probe *dtp; + struct kprobe_probe *kprobe_dtp; + size_t i; + + for (i = 0; i < PPTMASK; ++i) { + SLIST_FOREACH(kprobe_dtp, &dtpf_entry[i], kprobe_next) { + dtp = kprobe_dtp->dtp; + + mtx_enter(&dtp->dtp_mtx); + dtp->dtp_ref--; + + if (dtp->dtp_ref == 0) { + unsigned s; + s = intr_disable(); + db_write_bytes(dtp->dtp_addr, SSF_SIZE, &patch); + intr_restore(s); + } + + mtx_leave(&dtp->dtp_mtx); + } + + } +} +#endif /* __amd64__ || __i386__ */ diff --git a/sys/dev/dt/dt_prov_profile.c b/sys/dev/dt/dt_prov_profile.c index 60d59af54f7..502e99828f3 100644 --- a/sys/dev/dt/dt_prov_profile.c +++ b/sys/dev/dt/dt_prov_profile.c @@ -1,4 +1,4 @@ -/* $OpenBSD: dt_prov_profile.c,v 1.3 2020/06/26 13:11:23 mpi Exp $ */ +/* $OpenBSD: dt_prov_profile.c,v 1.4 2021/09/03 16:45:45 jasper Exp $ */ /* * Copyright (c) 2019 Martin Pieuchot @@ -31,14 +31,15 @@ struct dt_probe *dtpp_interval; /* global periodic probe */ int dt_prov_profile_alloc(struct dt_probe *, struct dt_softc *, struct dt_pcb_list *, struct dtioc_req *); -void dt_prov_profile_enter(struct dt_provider *, ...); -void dt_prov_interval_enter(struct dt_provider *, ...); +int dt_prov_profile_enter(struct dt_provider *, ...); +int dt_prov_interval_enter(struct dt_provider *, ...); struct dt_provider dt_prov_profile = { .dtpv_name = "profile", .dtpv_alloc = dt_prov_profile_alloc, .dtpv_enter = dt_prov_profile_enter, .dtpv_leave = NULL, + .dtpv_dealloc = NULL, }; struct dt_provider dt_prov_interval = { @@ -46,6 +47,7 @@ struct dt_provider dt_prov_interval = { .dtpv_alloc = dt_prov_profile_alloc, .dtpv_enter = dt_prov_interval_enter, .dtpv_leave = NULL, + .dtpv_dealloc = NULL, }; int @@ -114,7 +116,7 @@ dt_prov_profile_fire(struct dt_pcb *dp) dp->dp_nticks = 0; } -void +int dt_prov_profile_enter(struct dt_provider *dtpv, ...) { struct cpu_info *ci = curcpu(); @@ -130,9 +132,10 @@ dt_prov_profile_enter(struct dt_provider *dtpv, ...) dt_prov_profile_fire(dp); } smr_read_leave(); + return 0; } -void +int dt_prov_interval_enter(struct dt_provider *dtpv, ...) { struct dt_pcb *dp; @@ -144,4 +147,5 @@ dt_prov_interval_enter(struct dt_provider *dtpv, ...) dt_prov_profile_fire(dp); } smr_read_leave(); + return 0; } diff --git a/sys/dev/dt/dt_prov_static.c b/sys/dev/dt/dt_prov_static.c index 789c2246e14..9c052abf731 100644 --- a/sys/dev/dt/dt_prov_static.c +++ b/sys/dev/dt/dt_prov_static.c @@ -1,4 +1,4 @@ -/* $OpenBSD: dt_prov_static.c,v 1.9 2021/09/02 17:21:39 jasper Exp $ */ +/* $OpenBSD: dt_prov_static.c,v 1.10 2021/09/03 16:45:45 jasper Exp $ */ /* * Copyright (c) 2019 Martin Pieuchot @@ -25,12 +25,13 @@ int dt_prov_static_alloc(struct dt_probe *, struct dt_softc *, struct dt_pcb_list *, struct dtioc_req *); -void dt_prov_static_hook(struct dt_provider *, ...); +int dt_prov_static_hook(struct dt_provider *, ...); struct dt_provider dt_prov_static = { - .dtpv_name = "tracepoint", - .dtpv_alloc = dt_prov_static_alloc, - .dtpv_enter = dt_prov_static_hook, + .dtpv_name = "tracepoint", + .dtpv_alloc = dt_prov_static_alloc, + .dtpv_enter = dt_prov_static_hook, + .dtpv_dealloc = NULL, }; /* @@ -137,7 +138,7 @@ dt_prov_static_alloc(struct dt_probe *dtp, struct dt_softc *sc, return 0; } -void +int dt_prov_static_hook(struct dt_provider *dtpv, ...) { struct dt_probe *dtp; @@ -172,4 +173,5 @@ dt_prov_static_hook(struct dt_provider *dtpv, ...) dt_pcb_ring_consume(dp, dtev); } smr_read_leave(); + return 1; } diff --git a/sys/dev/dt/dt_prov_syscall.c b/sys/dev/dt/dt_prov_syscall.c index 0db59ef1c9a..33822dade1b 100644 --- a/sys/dev/dt/dt_prov_syscall.c +++ b/sys/dev/dt/dt_prov_syscall.c @@ -1,4 +1,4 @@ -/* $OpenBSD: dt_prov_syscall.c,v 1.5 2021/09/02 17:21:39 jasper Exp $ */ +/* $OpenBSD: dt_prov_syscall.c,v 1.6 2021/09/03 16:45:45 jasper Exp $ */ /* * Copyright (c) 2019 Martin Pieuchot @@ -37,7 +37,7 @@ unsigned int dtps_nsysent = SYS_MAXSYSCALL; int dt_prov_syscall_alloc(struct dt_probe *, struct dt_softc *, struct dt_pcb_list *, struct dtioc_req *); -void dt_prov_syscall_entry(struct dt_provider *, ...); +int dt_prov_syscall_entry(struct dt_provider *, ...); void dt_prov_syscall_return(struct dt_provider *, ...); struct dt_provider dt_prov_syscall = { @@ -45,6 +45,7 @@ struct dt_provider dt_prov_syscall = { .dtpv_alloc = dt_prov_syscall_alloc, .dtpv_enter = dt_prov_syscall_entry, .dtpv_leave = dt_prov_syscall_return, + .dtpv_dealloc = NULL, }; int @@ -119,7 +120,7 @@ dt_prov_syscall_alloc(struct dt_probe *dtp, struct dt_softc *sc, return 0; } -void +int dt_prov_syscall_entry(struct dt_provider *dtpv, ...) { struct dt_probe *dtp; @@ -139,11 +140,11 @@ dt_prov_syscall_entry(struct dt_provider *dtpv, ...) KASSERT((argsize / sizeof(register_t)) <= DTMAXFUNCARGS); if (sysnum < 0 || sysnum >= dtps_nsysent) - return; + return 0; dtp = dtps_entry[sysnum]; if (!dtp->dtp_recording) - return; + return 0; smr_read_enter(); SMR_SLIST_FOREACH(dp, &dtp->dtp_pcbs, dp_pnext) { @@ -159,6 +160,7 @@ dt_prov_syscall_entry(struct dt_provider *dtpv, ...) dt_pcb_ring_consume(dp, dtev); } smr_read_leave(); + return 0; } void diff --git a/sys/dev/dt/dtvar.h b/sys/dev/dt/dtvar.h index 83ad3d22923..db47cfd4ea5 100644 --- a/sys/dev/dt/dtvar.h +++ b/sys/dev/dt/dtvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: dtvar.h,v 1.7 2021/09/02 19:41:48 jasper Exp $ */ +/* $OpenBSD: dtvar.h,v 1.8 2021/09/03 16:45:45 jasper Exp $ */ /* * Copyright (c) 2019 Martin Pieuchot @@ -63,7 +63,6 @@ struct dt_evt { #define dtev_args _args.E_entry /* function args. */ #define dtev_retval _args.E_return.__retval /* function retval */ #define dtev_error _args.E_return.__error /* function error */ - }; /* @@ -132,6 +131,7 @@ struct dtioc_stat { #define DTIOCRECORD _IOW('D', 3, int) #define DTIOCPRBENABLE _IOW('D', 4, struct dtioc_req) +#define DTIOCPRBDISABLE _IOW('D', 5, struct dtioc_req) #ifdef _KERNEL @@ -205,6 +205,7 @@ void dt_pcb_ring_consume(struct dt_pcb *, struct dt_evt *); * K kernel lock * D dt_lock * D,S dt_lock for writting and SMR for reading + * M dtp mutex */ struct dt_probe { SIMPLEQ_ENTRY(dt_probe) dtp_next; /* [K] global list of probes */ @@ -213,12 +214,15 @@ struct dt_probe { const char *dtp_func; /* [I] probe function */ const char *dtp_name; /* [I] probe name */ uint32_t dtp_pbn; /* [I] unique ID */ - volatile uint32_t dtp_recording; /* [D] is it recording? */ - uint8_t dtp_nargs; /* [I] # of arguments */ + volatile uint32_t dtp_recording; /* [d] is it recording? */ + struct mutex dtp_mtx; + unsigned dtp_ref; /* [m] # of PCBs referencing the probe */ /* Provider specific fields. */ int dtp_sysnum; /* [I] related # of syscall */ const char *dtp_argtype[5];/* [I] type of arguments */ + int dtp_nargs; /* [I] # of arguments */ + vaddr_t dtp_addr; /* [I] address of breakpint */ }; @@ -231,13 +235,18 @@ struct dt_provider { int (*dtpv_alloc)(struct dt_probe *, struct dt_softc *, struct dt_pcb_list *, struct dtioc_req *); - void (*dtpv_enter)(struct dt_provider *, ...); + int (*dtpv_enter)(struct dt_provider *, ...); void (*dtpv_leave)(struct dt_provider *, ...); + int (*dtpv_dealloc)(struct dt_probe *, struct dt_softc *, + struct dtioc_req *); }; +extern struct dt_provider dt_prov_kprobe; + int dt_prov_profile_init(void); int dt_prov_syscall_init(void); int dt_prov_static_init(void); +int dt_prov_kprobe_init(void); struct dt_probe *dt_dev_alloc_probe(const char *, const char *, struct dt_provider *); diff --git a/sys/kern/subr_prof.c b/sys/kern/subr_prof.c index 1de0726ab95..344af4e1c72 100644 --- a/sys/kern/subr_prof.c +++ b/sys/kern/subr_prof.c @@ -1,4 +1,4 @@ -/* $OpenBSD: subr_prof.c,v 1.30 2016/09/04 09:22:29 mpi Exp $ */ +/* $OpenBSD: subr_prof.c,v 1.31 2021/09/03 16:45:45 jasper Exp $ */ /* $NetBSD: subr_prof.c,v 1.12 1996/04/22 01:38:50 christos Exp $ */ /*- @@ -70,10 +70,6 @@ prof_init(void) char *cp; int size; -#if !defined(GPROF) && defined(DDBPROF) - db_prof_init(); -#endif - /* * Round lowpc and highpc to multiples of the density we're using * so the rest of the scaling (here and in gprof) stays in ints. diff --git a/usr.sbin/btrace/btrace.c b/usr.sbin/btrace/btrace.c index b4bb8864e07..2530126271a 100644 --- a/usr.sbin/btrace/btrace.c +++ b/usr.sbin/btrace/btrace.c @@ -1,4 +1,4 @@ -/* $OpenBSD: btrace.c,v 1.46 2021/09/02 17:21:39 jasper Exp $ */ +/* $OpenBSD: btrace.c,v 1.47 2021/09/03 16:45:44 jasper Exp $ */ /* * Copyright (c) 2019 - 2021 Martin Pieuchot @@ -503,15 +503,18 @@ rules_teardown(int fd) } TAILQ_FOREACH(r, &g_rules, br_next) { + dtrq = r->br_cookie; if (r->br_type != B_RT_PROBE) { if (r->br_type == B_RT_END) rend = r; continue; - } + } else { + if (ioctl(fd, DTIOCPRBDISABLE, dtrq)) + err(1, "DTIOCPRBDISABLE"); + } - dtrq = r->br_cookie; - if (dtrq->dtrq_evtflags & DTEVT_KSTACK) - dokstack = 1; + if (dtrq->dtrq_evtflags & DTEVT_KSTACK) + dokstack = 1; } if (dokstack) -- 2.20.1