From c6446ab5e573a97c8f28142a1eb83820ce9c4fb2 Mon Sep 17 00:00:00 2001 From: espie Date: Mon, 6 Jun 2022 07:39:39 +0000 Subject: [PATCH] Yet another iteration of the tied algorithm: some packages contain several hundred copies of the same file (even thousands! I'm looking at you sdcc), so matching through a list is O(n^2). So hash the hashes on the actual file name so that we match directly if the same name file didn't change, and use the first entry otherwise, since the name won't match. speeds up updating of texlive somewhat, and makes a huge difference for sdcc. --- usr.sbin/pkg_add/OpenBSD/PkgAdd.pm | 60 +++++++++++++++++++----------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/usr.sbin/pkg_add/OpenBSD/PkgAdd.pm b/usr.sbin/pkg_add/OpenBSD/PkgAdd.pm index f92b9534c35..9909ed23caa 100644 --- a/usr.sbin/pkg_add/OpenBSD/PkgAdd.pm +++ b/usr.sbin/pkg_add/OpenBSD/PkgAdd.pm @@ -1,7 +1,7 @@ #! /usr/bin/perl # ex:ts=8 sw=4: -# $OpenBSD: PkgAdd.pm,v 1.135 2022/05/30 09:30:40 espie Exp $ +# $OpenBSD: PkgAdd.pm,v 1.136 2022/06/06 07:39:39 espie Exp $ # # Copyright (c) 2003-2014 Marc Espie # @@ -84,7 +84,7 @@ sub hash_files my ($self, $state, $sha) = @_; return if $self->{link} or $self->{symlink} or $self->{nochecksum}; if (defined $self->{d}) { - push @{$sha->{$self->{d}->key}}, $self; + $sha->{$self->{d}->key}{$self->name} = $self; } } @@ -93,33 +93,49 @@ sub tie_files my ($self, $state, $sha) = @_; return if $self->{link} or $self->{symlink} or $self->{nochecksum}; # XXX python doesn't like this, overreliance on timestamps + return if $self->{name} =~ m/\.py$/ && !defined $self->{ts}; - if (exists $sha->{$self->{d}->key}) { - my ($tied, $realname); - for my $c (@{$sha->{$self->{d}->key}}) { - # don't tie if there's a problem with the file + + my $h = $sha->{$self->{d}->key}; + return if !defined $h; + + my ($tied, $realname); + my $c = $h->{$self->name}; + # first we try to match with the same name + if (defined $c) { + $realname = $c->realname($state); + # don't tie if the file doesn't exist + if (-f $realname && + # or was altered + (stat _)[7] == $self->{size}) { + $tied = $c; + } + } + # otherwise we grab any other match under similar rules + if (!defined $tied) { + for my $c ( values %{$h} ) { $realname = $c->realname($state); next unless -f $realname; - # and do a sanity check that this file wasn't altered next unless (stat _)[7] == $self->{size}; $tied = $c; - last if $tied->name eq $self->name; - } - return if !defined $tied; - if ($state->defines('checksum')) { - my $d = $self->compute_digest($realname, $self->{d}); - # XXX we don't have to display anything here - # because delete will take care of that - return unless $d->equals($self->{d}); + last; } - # so we found a match that find_extractible will use - $self->{tieto} = $tied; - # and we also need to tell size computation we won't be - # needing extra room for this. - $tied->{tied} = 1; - $state->say("Tying #1 to #2", $self->stringize, $realname) - if $state->verbose >= 3; } + return if !defined $tied; + + if ($state->defines('checksum')) { + my $d = $self->compute_digest($realname, $self->{d}); + # XXX we don't have to display anything here + # because delete will take care of that + return unless $d->equals($self->{d}); + } + # so we found a match that find_extractible will use + $self->{tieto} = $tied; + # and we also need to tell size computation we won't be needing + # extra diskspace for this. + $tied->{tied} = 1; + $state->say("Tying #1 to #2", $self->stringize, $realname) + if $state->verbose >= 3; } package OpenBSD::PkgAdd::State; -- 2.20.1