From: afresh1 Date: Sun, 3 Sep 2023 01:43:09 +0000 (+0000) Subject: Replace perl's use of syscall(2) with a dispatcher to libc X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=312e26c80be876012ae9792d4323a301329d66ae;p=openbsd Replace perl's use of syscall(2) with a dispatcher to libc This removes the ability to do direct syscalls from perl, instead calling the appropriate libc functions. Currently we generate the dispatcher via a perl script duing build. requested by deraadt@ nits from espie@ education from miod@ infrastructure fixes from sthen@ many improvements and ok gkoehler@ Please commit soon. OK bluhm@ --- diff --git a/gnu/usr.bin/perl/MANIFEST b/gnu/usr.bin/perl/MANIFEST index 2151630286b..65060e3eddf 100644 --- a/gnu/usr.bin/perl/MANIFEST +++ b/gnu/usr.bin/perl/MANIFEST @@ -6605,6 +6605,7 @@ t/op/svleak.pl Test file for svleak.t t/op/svleak.t See if stuff leaks SVs t/op/switch.t See if switches (given/when) work t/op/symbolcache.t See if undef/delete works on stashes with functions +t/op/syscall_emulator.t Tests that syscall works via the emulator t/op/sysio.t See if sysread and syswrite work t/op/taint.t See if tainting works t/op/threads.t Misc. tests for perl features with threads diff --git a/gnu/usr.bin/perl/Makefile.SH b/gnu/usr.bin/perl/Makefile.SH index c9f6308b1b5..6fc31c8fee6 100644 --- a/gnu/usr.bin/perl/Makefile.SH +++ b/gnu/usr.bin/perl/Makefile.SH @@ -541,7 +541,7 @@ c1 = av.c scope.c op.c doop.c doio.c dump.c gv.c hv.c mg.c reentr.c mro_core.c p c2 = perly.c pp.c pp_hot.c pp_ctl.c pp_sys.c regcomp.c regexec.c utf8.c sv.c c3 = taint.c toke.c util.c deb.c run.c builtin.c universal.c pad.c globals.c keywords.c c4 = perlio.c numeric.c mathoms.c locale.c pp_pack.c pp_sort.c caretx.c dquote.c time64.c -c5 = $(mallocsrc) +c5 = $(mallocsrc) syscall_emulator.c !NO!SUBS! @@ -557,7 +557,7 @@ c = $(c1) $(c2) $(c3) $(c4) $(c5) miniperlmain.c $(mini_only_src) obj1 = $(mallocobj) gv$(OBJ_EXT) toke$(OBJ_EXT) perly$(OBJ_EXT) pad$(OBJ_EXT) regcomp$(OBJ_EXT) dump$(OBJ_EXT) util$(OBJ_EXT) mg$(OBJ_EXT) reentr$(OBJ_EXT) mro_core$(OBJ_EXT) keywords$(OBJ_EXT) builtin$(OBJ_EXT) obj2 = hv$(OBJ_EXT) av$(OBJ_EXT) run$(OBJ_EXT) pp_hot$(OBJ_EXT) sv$(OBJ_EXT) pp$(OBJ_EXT) scope$(OBJ_EXT) pp_ctl$(OBJ_EXT) pp_sys$(OBJ_EXT) -obj3 = doop$(OBJ_EXT) doio$(OBJ_EXT) regexec$(OBJ_EXT) utf8$(OBJ_EXT) taint$(OBJ_EXT) deb$(OBJ_EXT) globals$(OBJ_EXT) perlio$(OBJ_EXT) numeric$(OBJ_EXT) mathoms$(OBJ_EXT) locale$(OBJ_EXT) pp_pack$(OBJ_EXT) pp_sort$(OBJ_EXT) caretx$(OBJ_EXT) dquote$(OBJ_EXT) time64$(OBJ_EXT) +obj3 = doop$(OBJ_EXT) doio$(OBJ_EXT) regexec$(OBJ_EXT) utf8$(OBJ_EXT) taint$(OBJ_EXT) deb$(OBJ_EXT) globals$(OBJ_EXT) perlio$(OBJ_EXT) numeric$(OBJ_EXT) mathoms$(OBJ_EXT) locale$(OBJ_EXT) pp_pack$(OBJ_EXT) pp_sort$(OBJ_EXT) caretx$(OBJ_EXT) dquote$(OBJ_EXT) time64$(OBJ_EXT) syscall_emulator$(OBJ_EXT) # split the objects into 3 exclusive sets: those used by both miniperl and # perl, and those used by just one or the other. Doesn't include the diff --git a/gnu/usr.bin/perl/Makefile.bsd-wrapper b/gnu/usr.bin/perl/Makefile.bsd-wrapper index 8f563014e34..4fdf6f15853 100644 --- a/gnu/usr.bin/perl/Makefile.bsd-wrapper +++ b/gnu/usr.bin/perl/Makefile.bsd-wrapper @@ -1,4 +1,4 @@ -# $OpenBSD: Makefile.bsd-wrapper,v 1.113 2023/02/15 01:38:20 afresh1 Exp $ +# $OpenBSD: Makefile.bsd-wrapper,v 1.114 2023/09/03 01:43:09 afresh1 Exp $ # # Build wrapper for Perl # @@ -39,11 +39,18 @@ cleandir: fi cd ${.CURDIR} && ${MAKE} -f Makefile.bsd-wrapper1 cleandir -all: config.sh +all: syscall_emulator.c config.sh cd ${.CURDIR} && exec ${MAKE} -f Makefile.bsd-wrapper1 perl.build cd ${.CURDIR} && exec ${MAKE} -f Makefile.bsd-wrapper1 mansrc.build install: cd ${.CURDIR} && exec ${MAKE} -f Makefile.bsd-wrapper1 install + +syscall_emulator.c: gen_syscall_emulator.pl syscall_emulator.h /usr/include/sys/syscall.h /usr/include/sys/syscallargs.h + /usr/bin/perl $(.CURDIR)/gen_syscall_emulator.pl > $@ + +syscall_emulator.h: + ln -sf $(.CURDIR)/$@ $@ + .include diff --git a/gnu/usr.bin/perl/config.over b/gnu/usr.bin/perl/config.over index dfcc36cab13..cf13aa679e9 100644 --- a/gnu/usr.bin/perl/config.over +++ b/gnu/usr.bin/perl/config.over @@ -1,7 +1,7 @@ # # Override default paths when building in the OpenBSD src tree # -# $OpenBSD: config.over,v 1.22 2017/02/05 00:33:38 afresh1 Exp $ +# $OpenBSD: config.over,v 1.23 2023/09/03 01:43:09 afresh1 Exp $ # # We use a different architecture name than the default @@ -64,3 +64,9 @@ myuname='openbsd' # force to use ranlib ranlib='ranlib' + +# Enable the syscall emulator, +# enabling syscall even if we don't have it +d_syscall=define +d_syscallproto=define + diff --git a/gnu/usr.bin/perl/gen_syscall_emulator.pl b/gnu/usr.bin/perl/gen_syscall_emulator.pl new file mode 100755 index 00000000000..dd22ab01a81 --- /dev/null +++ b/gnu/usr.bin/perl/gen_syscall_emulator.pl @@ -0,0 +1,360 @@ +#!/usr/bin/perl +# $OpenBSD: gen_syscall_emulator.pl,v 1.1 2023/09/03 01:43:09 afresh1 Exp $ # +use v5.36; +use autodie; + +# Copyright (c) 2023 Andrew Hewus Fresh +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +my $includes = '/usr/include'; + +# Because perl uses a long for every syscall argument, +# if we are building a syscall_emulator for use by perl, +# taking that into account make things work more consistently +# across different OpenBSD architectures. +# Unfortunately there doesn't appear to be an easy way +# to make everything work "the way it was". +use constant PERL_LONG_ARGS => 1; + +# See also /usr/src/sys/kern/syscalls.master +my %syscalls = parse_syscalls( + "$includes/sys/syscall.h", + "$includes/sys/syscallargs.h", +)->%*; +delete $syscalls{MAXSYSCALL}; # not an actual function + +# The ordered list of all the headers we need +my @headers = qw< + sys/syscall.h + stdarg.h + errno.h + + sys/socket.h + sys/event.h + sys/futex.h + sys/ioctl.h + sys/ktrace.h + sys/mman.h + sys/mount.h + sys/msg.h + sys/poll.h + sys/ptrace.h + sys/resource.h + sys/select.h + sys/sem.h + sys/shm.h + sys/stat.h + sys/sysctl.h + sys/time.h + sys/uio.h + sys/wait.h + + dirent.h + fcntl.h + sched.h + signal.h + stdlib.h + stdio.h + syslog.h + tib.h + time.h + unistd.h +>; + +foreach my $header (@headers) { + my $filename = "$includes/$header"; + open my $fh, '<', $filename; + my $content = do { local $/; readline $fh }; + close $fh; + + foreach my $name (sort keys %syscalls) { + my $s = $syscalls{$name}; + my $func_sig = find_func_sig($content, $name, $s); + + if (ref $func_sig) { + die "Multiple defs for $name <$header> <$s->{header}>" + if $s->{header}; + $s->{func} = $func_sig; + $s->{header} = $header; + } elsif ($func_sig) { + $s->{mismatched_sig} = "$func_sig <$header>"; + } + } +} + +say "/*\n * Generated from gen_syscall_emulator.pl\n */"; +say "#include <$_>" for @headers; +print <<"EOL"; +#include "syscall_emulator.h" + +long +syscall_emulator(int syscall, ...) +{ + long ret = 0; + va_list args; + va_start(args, syscall); + + switch(syscall) { +EOL + +foreach my $name ( + sort { $syscalls{$a}{id} <=> $syscalls{$b}{id} } keys %syscalls + ) { + my %s = %{ $syscalls{$name} }; + + # Some syscalls we can't emulate, so we comment those out. + $s{skip} //= "Indirect syscalls not supported" + if !$s{argtypes} && ($s{args}[-1] || '') eq '...'; + $s{skip} //= "Mismatched func: $s{mismatched_sig}" + if $s{mismatched_sig} and not $s{func}; + $s{skip} //= "No signature found in headers" + unless $s{header}; + + my $ret = $s{ret} eq 'void' ? '' : 'ret = '; + $ret .= '(long)' if $s{ret} eq 'void *'; + + my (@args, @defines); + my $argname = ''; + if ($s{argtypes}) { + if (@{ $s{argtypes} } > 1) { + @defines = map { + my $t = $_->{type}; + my $n = $_->{name}; + $n = "_$n" if $n eq $name; # link :-/ + push @args, $n; + PERL_LONG_ARGS + ? "$t $n = ($t)va_arg(args, long);" + : "$t $n = va_arg(args, $t);" + } @{ $s{argtypes} }; + } else { + if (@{ $s{argtypes} }) { + $argname = " // " . join ', ', + map { $_->{name} } + @{ $s{argtypes} }; + } + @args = map { "va_arg(args, $_->{type})" } + @{ $s{argtypes} }; + } + } else { + @args = @{ $s{args} }; + + # If we didn't find args in syscallargs.h but have args + # we don't know how to write our function. + $s{skip} //= "Not found in sys/syscallargs.h" + if @args; + } + + #my $header = $s{header} ? " <$s{header}>" : ''; + + my $indent = "\t"; + say "$indent/* $s{skip}" if $s{skip}; + + $indent .= ' *' if $s{skip}; + say "${indent} $s{signature} " + if $s{skip} && $s{skip} =~ /Mismatch/; + + my $brace = @defines ? " {" : ""; + say "${indent}case $s{define}:$brace"; # // $s{id}"; + say "${indent}\t$_" for @defines; + #say "${indent}\t// $s{signature}$header"; + say "${indent}\t$ret$name(" . join(', ', @args) . ");$argname"; + say "${indent}\tbreak;"; + say "${indent}}" if $brace; + + say "\t */" if $s{skip}; +} + +print <<"EOL"; + default: + ret = -1; + errno = ENOSYS; + } + va_end(args); + + return ret; +} +EOL + + +sub parse_syscalls($syscall, $args) +{ + my %s = parse_syscall_h($syscall)->%*; + + my %a = parse_syscallargs_h($args)->%*; + $s{$_}{argtypes} = $a{$_} for grep { $a{$_} } keys %s; + + return \%s; +} + +sub parse_syscall_h($filename) +{ + my %s; + open my $fh, '<', $filename; + while (readline $fh) { + if (m{^/\* + \s+ syscall: \s+ "(?[^"]+)" + \s+ ret: \s+ "(? [^"]+)" + \s+ args: \s+ (?.*?) + \s* \*/ + | + ^\#define \s+ (?SYS_(?\S+)) \s+ (?\d+) + }x) + { + my $name = $+{name}; + $s{$name}{$_} = $+{$_} for keys %+; + $s{$name}{args} = [ $+{args} =~ /"(.*?)"/g ] + if exists $+{args}; + } + } + close $fh; + + foreach my $name (keys %s) { + my %d = %{ $s{$name} }; + next unless $d{ret}; # the MAXSYSCALL + + my $ret = $d{ret}; + my @args = @{ $d{args} || [] }; + @args = 'void' unless @args; + + if ($args[-1] ne '...') { + my @a; + for (@args) { + push @a, $_; + last if $_ eq '...'; + } + @args = @a; + } + + my $args = join ", ", @args; + $s{$name}{signature} = "$ret\t$name($args);" =~ s/\s+/ /gr; + #print " $s{$name}{signature}\n"; + } + + return \%s; +} + +sub parse_syscallargs_h($filename) +{ + my %args; + + open my $fh, '<', $filename; + while (readline $fh) { + if (my ($syscall) = /^struct \s+ sys_(\w+)_args \s+ \{/x) { + $args{$syscall} = []; + while (readline $fh) { + last if /^\s*\};\s*$/; + if (/syscallarg + \( (? [^)]+ ) \) + \s+ (? \w+ ) \s* ; + /x) { + push @{$args{$syscall}}, {%+}; + } + } + } + } + close $fh; + + return \%args; +} + +sub find_func_sig($content, $name, $s) +{ + my $re = $s->{re} //= qr{^ + (? \S+ (?: [^\S\n]+ \S+)? ) [^\S\n]* \n? + \b \Q$name\E \( (? [^)]* ) \) + [^;]*; + }xms; + + $content =~ /$re/ || return !!0; + my $ret = $+{ret}; + my $args = $+{args}; + + for ($ret, $args) { + s/^\s+//; + s/\s+$//; + s/\s+/ /g; + } + + # The actual functions may have this extra annotation + $args =~ s/\*\s*__restrict/*/g; + + my %func_sig = ( ret => $ret, args => [ split /\s*,\s*/, $args ] ); + + return "$ret $name($args);" =~ s/\s+/ /gr + unless sigs_match($s, \%func_sig); + + return \%func_sig; +} + +# Tests whether two types are equivalent. +# Sometimes there are two ways to represent the same thing +# and it seems the functions and the syscalls +# differ a fair amount. +sub types_match($l, $r) +{ + state %m = ( + caddr_t => 'char *', + idtype_t => 'int', + nfds_t => 'u_int', + __off_t => 'off_t', + pid_t => 'int', + __size_t => 'u_long', + size_t => 'u_long', + 'unsigned int' => 'u_int', + 'unsigned long' => 'u_long', + ); + + $l //= '__undef__'; + $r //= '__undef__'; + + s/\b volatile \s+//x for $l, $r; + s/\b const \s+//x for $l, $r; + s/\s* \[\d*\] $/ \*/x for $l, $r; + + my ($f, $s) = sort { length($a) <=> length($b) } $l, $r; + if (index($s, $f) == 0) { + $s =~ s/^\Q$f\E\s*//; + if ( $s && $s =~ /^\w+$/ ) { + #warn "prefix ['$f', '$s']\n"; + s/\s*\Q$s\E$// for $l, $r; + } + } + + $l = $m{$l} //= $l; + $r = $m{$r} //= $r; + + return $l eq $r; +} + + +# Tests whether two function signatures match, +# expected to be left from syscall.h, right from the appopriate header. +sub sigs_match($l, $r) +{ + return !!0 unless types_match( $l->{ret}, $l->{ret} ); + + my @l_args = @{ $l->{args} || [] }; + my @r_args = @{ $r->{args} || [] }; + + for (\@l_args, \@r_args) { + @{$_} = 'void' unless @{$_}; + } + + for my $i ( 0 .. $#l_args ) { + return !!0 unless types_match($l_args[$i], $r_args[$i]); + last if $l_args[$i] eq '...'; + } + + return !!1; +} diff --git a/gnu/usr.bin/perl/pp_sys.c b/gnu/usr.bin/perl/pp_sys.c index 4cbe323ec51..6cf31244dda 100644 --- a/gnu/usr.bin/perl/pp_sys.c +++ b/gnu/usr.bin/perl/pp_sys.c @@ -30,6 +30,8 @@ #define PERL_IN_PP_SYS_C #include "perl.h" #include "time64.h" +#include "syscall_emulator.h" +#define syscall syscall_emulator #ifdef I_SHADOW /* Shadow password support for solaris - pdo@cs.umd.edu diff --git a/gnu/usr.bin/perl/syscall_emulator.h b/gnu/usr.bin/perl/syscall_emulator.h new file mode 100644 index 00000000000..e0a3bbeb988 --- /dev/null +++ b/gnu/usr.bin/perl/syscall_emulator.h @@ -0,0 +1 @@ +long syscall_emulator(int, ...); diff --git a/gnu/usr.bin/perl/t/op/syscall_emulator.t b/gnu/usr.bin/perl/t/op/syscall_emulator.t new file mode 100644 index 00000000000..34745f00895 --- /dev/null +++ b/gnu/usr.bin/perl/t/op/syscall_emulator.t @@ -0,0 +1,148 @@ +#!/usr/bin/perl +# $OpenBSD: syscall_emulator.t,v 1.1 2023/09/03 01:43:09 afresh1 Exp $ # + +# Copyright (c) 2023 Andrew Hewus Fresh +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +BEGIN { + chdir 't' if -d 't'; + require "./test.pl"; + set_up_inc( qw(. ../lib lib ../dist/base/lib) ); +} + +use v5.36; + +use File::Temp; +use POSIX qw< S_IRUSR S_IWUSR S_IRGRP S_IROTH O_CREAT O_WRONLY O_RDONLY >; + +use constant { + PROT_READ => 0x01, + MAP_PRIVATE => 0x0002, + MAP_FAILED => -1, +}; + +my $dir = File::Temp->newdir("syscall_emulator-XXXXXXXXX"); +{ + local $ENV{PERL5LIB} = join ':', @INC; + open(my $fh, '-|', $^X, "../utils/h2ph", '-d', $dir, + "/usr/include/sys/syscall.h") or die "h2ph: $!"; + note <$fh>; + close($fh) or die $! ? "h2ph: $!" : "h2ph: $?"; + local @INC = ("$dir/usr/include", "$dir"); + require 'sys/syscall.ph'; +} + +my $filename = "test.txt"; +my $file = "$dir/$filename"; +my $fd; +my $out = "Hello World\n"; +my $in = "\0" x 32; +my ($in_p, $in_v); +my $sb = "\0" x 4096; +my $st_mode; + +my $perms = S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH; + +plan tests => 17; + +ok(! + (($fd = syscall(SYS_open(), $file, O_CREAT|O_WRONLY, $perms)) < 0), + "Opened $filename for write/create" +); +ok(! + (syscall(SYS_write(), $fd, $out, length $out) <= 0), + "Wrote out to $filename" +); +ok(! + (syscall(SYS_close(), $fd) != 0), + "closed $filename" +); + + +ok(! + (syscall(SYS_stat(), $file, $sb) != 0), + "stat $filename" +); + +# fortunately st_mode is the first unsigned long in stat struct +$st_mode = unpack "L", $sb; + +ok( ($st_mode & 0777) == ($perms & 0777), + sprintf "new file %s has correct permissions (%o)", + $filename, $st_mode & 0777 +); + +ok(! + (($fd = syscall(SYS_open(), $file, O_RDONLY)) < 0), + "Opened $filename for read" +); +ok(! + (syscall(SYS_read(), $fd, $in, length $in) <= 0), + "read from $filename" +); + +$in = unpack 'Z*', $in; + +ok( length($in) == length($out) && ($in eq $out), + "Read written content from $filename" +); + +ok(! + (syscall(SYS_lseek(), $fd, 0, SEEK_SET) < 0), + "lseek on fd" +); + +ok(! + (syscall(SYS_pread(), $fd, $in = "\0" x 32, 5, 3) < 0), + "pread on fd" +); + +$in = unpack 'Z*', $in; + +ok( length($in) == 5 && ($in eq substr $out, 3, 5), + "Read written content from $filename ($in)" +); + +ok(! + (syscall(SYS_lseek(), $fd, 0, SEEK_SET) < 0), + "lseek on fd" +); + +ok(! + (syscall(SYS_lseek(), $fd, 0, SEEK_SET) < 0), + "lseek on fd" +); + +ok(! + (($in_p = syscall(SYS_mmap(), undef, length($out), PROT_READ, MAP_PRIVATE, + $fd, 0)) == MAP_FAILED), + "mmap fd" +); + +# From ingy's Pointer module +$in_v = unpack "p*", pack "L!", $in_p; + +ok( length($in_v) == length($out) && ($in_v eq $out), + "Read written content from $filename" +); + +ok(! + (syscall(SYS_munmap(), $in_p, length($out)) != 0), + "munmap fd" +); + +ok(! + (syscall(SYS_close(), $fd) != 0), + "closed $filename" +);