Yet another iteration of the tied algorithm: some packages contain
authorespie <espie@openbsd.org>
Mon, 6 Jun 2022 07:39:39 +0000 (07:39 +0000)
committerespie <espie@openbsd.org>
Mon, 6 Jun 2022 07:39:39 +0000 (07:39 +0000)
several hundred copies of the same file (even thousands! I'm looking at you
sdcc), so matching through a list is O(n^2).

So hash the hashes on the actual file name so that we match directly if
the same name file didn't change, and use the first entry otherwise, since
the name won't match.

speeds up updating of texlive somewhat, and makes a huge difference for sdcc.

usr.sbin/pkg_add/OpenBSD/PkgAdd.pm

index f92b953..9909ed2 100644 (file)
@@ -1,7 +1,7 @@
 #! /usr/bin/perl
 
 # ex:ts=8 sw=4:
-# $OpenBSD: PkgAdd.pm,v 1.135 2022/05/30 09:30:40 espie Exp $
+# $OpenBSD: PkgAdd.pm,v 1.136 2022/06/06 07:39:39 espie Exp $
 #
 # Copyright (c) 2003-2014 Marc Espie <espie@openbsd.org>
 #
@@ -84,7 +84,7 @@ sub hash_files
        my ($self, $state, $sha) = @_;
        return if $self->{link} or $self->{symlink} or $self->{nochecksum};
        if (defined $self->{d}) {
-               push @{$sha->{$self->{d}->key}}, $self;
+               $sha->{$self->{d}->key}{$self->name} = $self;
        }
 }
 
@@ -93,33 +93,49 @@ sub tie_files
        my ($self, $state, $sha) = @_;
        return if $self->{link} or $self->{symlink} or $self->{nochecksum};
        # XXX python doesn't like this, overreliance on timestamps
+
        return if $self->{name} =~ m/\.py$/ && !defined $self->{ts};
-       if (exists $sha->{$self->{d}->key}) {
-               my ($tied, $realname);
-               for my $c (@{$sha->{$self->{d}->key}}) {
-                       # don't tie if there's a problem with the file
+
+       my $h = $sha->{$self->{d}->key};
+       return if !defined $h;
+
+       my ($tied, $realname);
+       my $c = $h->{$self->name};
+       # first we try to match with the same name
+       if (defined $c) {
+               $realname = $c->realname($state);
+               # don't tie if the file doesn't exist
+               if (-f $realname && 
+               # or was altered
+                   (stat _)[7] == $self->{size}) {
+                       $tied = $c;
+               }
+       }
+       # otherwise we grab any other match under similar rules
+       if (!defined $tied) {
+               for my $c ( values %{$h} ) {
                        $realname = $c->realname($state);
                        next unless -f $realname;
-                       # and do a sanity check that this file wasn't altered
                        next unless (stat _)[7] == $self->{size};
                        $tied = $c;
-                       last if $tied->name eq $self->name;
-               }
-               return if !defined $tied;
-               if ($state->defines('checksum')) {
-                       my $d = $self->compute_digest($realname, $self->{d});
-                       # XXX we don't have to display anything here
-                       # because delete will take care of that
-                       return unless $d->equals($self->{d});
+                       last;
                }
-               # so we found a match that find_extractible will use
-               $self->{tieto} = $tied;
-               # and we also need to tell size computation we won't be
-               # needing extra room for this.
-               $tied->{tied} = 1;
-               $state->say("Tying #1 to #2", $self->stringize, $realname) 
-                   if $state->verbose >= 3;
        }
+       return if !defined $tied;
+
+       if ($state->defines('checksum')) {
+               my $d = $self->compute_digest($realname, $self->{d});
+               # XXX we don't have to display anything here
+               # because delete will take care of that
+               return unless $d->equals($self->{d});
+       }
+       # so we found a match that find_extractible will use
+       $self->{tieto} = $tied;
+       # and we also need to tell size computation we won't be needing 
+       # extra diskspace for this.
+       $tied->{tied} = 1;
+       $state->say("Tying #1 to #2", $self->stringize, $realname) 
+           if $state->verbose >= 3;
 }
 
 package OpenBSD::PkgAdd::State;