From: espie Date: Fri, 31 Mar 2000 15:55:06 +0000 (+0000) Subject: Parse nroff source directly as well, don't use getNAME which is awful X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=d4e252f7ccae06a56aa514474561cc53d2923d60;p=openbsd Parse nroff source directly as well, don't use getNAME which is awful at doing this. --- diff --git a/libexec/makewhatis/makewhatis.pl b/libexec/makewhatis/makewhatis.pl index 5b39c3d48a0..1633b762173 100644 --- a/libexec/makewhatis/makewhatis.pl +++ b/libexec/makewhatis/makewhatis.pl @@ -1,7 +1,7 @@ #!/usr/bin/perl -w # ex:ts=4 sw=4: -# $OpenBSD: makewhatis.pl,v 1.2 2000/02/05 22:15:16 espie Exp $ +# $OpenBSD: makewhatis.pl,v 1.3 2000/03/31 15:55:06 espie Exp $ # # Copyright (c) 2000 Marc Espie. # @@ -56,26 +56,133 @@ sub write_uniques chown 0, (getgrnam 'bin')[2], $f; } -# handle_unformated($result, $args) -# -# handle a batch of unformated manpages $args, -# push the subjects to $result -# +sub add_fsubject +{ + my $lines = shift; + my $toadd = shift; + my $section = shift; + local $_ = join(' ', @$toadd); + # unbreakable spaces + s/\\\s+/ /g; + # em dashes + s/\\\(em\s+/- /g; + # font changes + s/\\f[BIRP]//g; + s/\\-/($section) -/ || s/\s-\s/ ($section) - /; + # other dashes + s/\\-/-/g; + # sequence of spaces + s/\s+$//; + s/\s+/ /g; + # escaped characters + s/\\\&(.)/$1/g; + # gremlins... + s/\\c//g; + push(@$lines, $_); +} + sub handle_unformated { - my $result = shift; - my $args = shift; + my $f = shift; + my $filename = shift; + my @lines = (); + my $so_found = 0; local $_; - my $cmd; - - $cmd = new IO::File "/usr/libexec/getNAME ".join(" ", @$args)."|"; - while (<$cmd>) { - chomp; - s/ [a-zA-Z\d]* \\-/ -/; - push(@$result, $_); + # retrieve basename of file + my ($name, $section) = $filename =~ m|(?:.*/)?(.*)\.([\w\d]+)|; + # scan until macro + while (<$f>) { + next unless m/^\./; + if (m/^\.de/) { + while (<$f>) { + last if m/^\.\./; + } + next; + } + $so_found = 1 if m/\.so/; + if (m/^\.TH/ || m/^\.th/) { + # ($name2, $section2) = m/^\.(?:TH|th)\s+(\S+)\s+(\S+)/; + while (<$f>) { + next unless m/^\./; + if (m/^\.SH/ || m/^\.sh/) { + my @subject = (); + while (<$f>) { + last if m/^\.SH/ || m/^\.sh/ || m/^\.SS/ || + m/^\.ss/ || m/^\.nf/; + if (m/^\.PP/ || m/^\.br/ || m/^\.PD/ || /^\.sp/) { + add_fsubject(\@lines, \@subject, $section) + if @subject != 0; + @subject = (); + next; + } + next if m/^\'/ || m/\.tr\s+/ || m/\.\\\"/; + if (m/^\.de/) { + while (<$f>) { + last if m/^\.\./; + } + next; + } + chomp; + s/\.(?:B|I|IR|SM)\s+//; + push(@subject, $_) unless m/^\s*$/; + } + add_fsubject(\@lines, \@subject, $section) + if @subject != 0; + return \@lines; + } + } + warn "Couldn't find subject in old manpage $filename\n"; + } elsif (m/^\.Dt/) { + $section .= "/$1" if (m/^\.Dt\s+\S+\s+\d\S*\s+(\S+)/); + while (<$f>) { + next unless m/^\./; + if (m/^\.Sh/) { + # subject/keep is the only way to deal with Nm/Nd pairs + my @subject = (); + my @keep = (); + my $nd_seen = 0; + while (<$f>) { + last if m/^\.Sh/; + s/\s,/,/g; + if (s/^\.(..)\s+//) { + my $macro = $1; + next if $macro eq "\\\""; + s/\"(.*?)\"/$1/g; + s/\\-/-/g; + $macro eq 'Xr' and s/^(\S+)\s+(\d\S*)/$1 ($2)/; + $macro eq 'Ox' and s/^/OpenBSD /; + $macro eq 'Nx' and s/^/NetBSD /; + if ($macro eq 'Nd') { + if (@keep != 0) { + add_fsubject(\@lines, \@keep, $section); + @keep = (); + } + push(@subject, "\\-"); + $nd_seen = 1; + } + if ($nd_seen && $macro eq 'Nm') { + @keep = @subject; + @subject = (); + $nd_seen = 0; + } + } + push(@subject, $_) unless m/^\s*$/; + } + unshift(@subject, @keep) if @keep != 0; + add_fsubject(\@lines, \@subject, $section) + if @subject != 0; + return \@lines; + } + } + } + } + if ($so_found == 0) { + warn "Unknown manpage type $filename\n"; } - close $cmd; + return \@lines; } + + sub add_subject { @@ -121,9 +228,13 @@ sub handle_formated {} if (m/\w[-+.\w\d]*\(([-+.\w\d\/]+)\)/) { $section = $1; + # Find architecture + if (m/Manual\s+\((.*?)\)/) { + $section = "$section/$1"; + } } # Not all man pages are in english - if (m/^(?:NAME|NAMN)\s*$/) { + if (m/^(?:NAME|NAMN|Name)\s*$/) { unless (defined $section) { print STDERR "Can't find section in $filename\n"; $section='??'; @@ -162,7 +273,7 @@ sub find_manpages $list=[]; find( sub { - return unless /(?:\.[0-9]|0\.Z|0\.gz)$/; + return unless /\.\d\w*(?:\.Z|\.gz)?$/; return unless -f $_; my $inode = (stat _)[1]; return if defined $nodes{$inode}; @@ -180,30 +291,27 @@ sub scan_manpages { my $list = shift; local $_; - my (@todo, $done); + my ($done); $done=[]; for (@$list) { my ($file, $subjects); - if (m/\.[1-9]$/) { - push(@todo, $_); - if (@todo > 5000) { - handle_unformated($done, \@todo); - @todo = (); - } - next; - } elsif (m/\.0\.(?:Z|gz)$/) { + if (m/\.(?:Z|gz)$/) { $file = new IO::File "gzip -fdc $_|"; - } else { + $_ = $`; + } else { $file = new IO::File $_ or die "$0: Can't read $_\n"; + } + if (m/\.[1-9][^.]*$/) { + $subjects = handle_unformated($file, $_); + } elsif (m/\.0$/) { + $subjects = handle_formated($file, $_); + } else { + warn "Can't find type of $_"; + next; } - - $subjects = handle_formated($file, $_); push @$done, @$subjects; } - if (@todo > 0) { - handle_unformated($done, \@todo); - } return $done; } @@ -261,6 +369,12 @@ if ($#ARGV == -1) { } for my $mandir (@ARGV) { + if (-f $mandir) { + my @l = ($mandir); + my $s = scan_manpages(\@l); + print join("\n", @$s), "\n"; + exit 0; + } unless (-d $mandir) { die "$0: $mandir: not a directory" }