diff options
author | Tom Ryder <tom@sanctum.geek.nz> | 2017-07-29 17:38:12 +1200 |
---|---|---|
committer | Tom Ryder <tom@sanctum.geek.nz> | 2017-07-29 17:48:59 +1200 |
commit | 27d5f435fad45378e289461040dd7ab2588d6a46 (patch) | |
tree | 07e37471227f002d25d227206c2dbaab81eec0d1 | |
parent | Can just use stat now (diff) | |
download | checkem-2.1.tar.gz (sig) checkem-2.1.zip |
Refactor, version bumpv2.1
Do initial size filtering and collection in the wanted sub, and delay
inode checking until filtering each size
-rwxr-xr-x | checkem | 63 |
1 files changed, 29 insertions, 34 deletions
@@ -19,15 +19,15 @@ use Digest::SHA; use 5.009003; # Version number to make Perl::Critic happy -our $VERSION = 2.0; +our $VERSION = 2.1; # If no arguments, work with the current working directory croak('Need at least one dir to search') if !@ARGV; -# Start an array of filenames... -my @fns; +# Start a hash of filesizes to file names/stats... +my %sizes; -# ... and fill it up with File::Find. +# ...and fill it up with File::Find. find { no_chdir => 1, wanted => sub { @@ -35,49 +35,44 @@ find { ## no critic (ProhibitFiletest_f) return if !-f $fn; return if -l $fn; - push @fns, $fn; + my $st = stat $fn or return; + return if !$st->size; + push @{ $sizes{ $st->size } }, + { + fn => $fn, + st => $st, + }; return; }, }, @ARGV; -# Start a table of device and inode to detect hard links, to completely skip -# any files we've already seen -my %inodes; - -# Build a table of filesizes to list of filenames with that size -my %sizes; -FN: for my $fn (@fns) { - - # Get file metadata - my $st = stat $fn or next FN; +# If there's more than one filename of any of the sizes, look for hard links, +# checksum them if not linked, and push them into a sums table +my ( %sums, $dig ); +FS: for my $fs ( grep { @{$_} > 1 } values %sizes ) { - # Skip the file if it's empty - next FN if !$st->size; + # Keep a temporary table of inodes to catch hard links + my %inos; - # Try to get a device ID and inode, so we can ignore hard-linked files - # using the inodes lookup table if we've seen that tuple before - if ( $st->dev && $st->ino ) { - next FN if exists $inodes{ $st->dev }{ $st->ino }; - $inodes{ $st->dev }{ $st->ino } = 1; - } + # Iterate through each file in the list + F: for my $f ( @{$fs} ) { - # Add the filename to its size bucket - push @{ $sizes{ $st->size } }, $fn; -} + # Catch hard links on compliant systems by keeping a dev/inode hash + my ( $dev, $ino ) = ( $f->{st}->dev, $f->{st}->ino ); + if ( $dev && $ino ) { + next F if exists $inos{$dev}{$ino}; + $inos{$dev}{$ino} = $f; + } -# If there's more than one filename of any of the sizes, checksum them and push -# them into a sums table -my ( %sums, $dig ); -SIZE: for my $fns ( grep { @{$_} > 1 } values %sizes ) { - for my $fn ( @{$fns} ) { + # Files still the same size and not hard linked, group by digest $dig //= Digest::SHA->new('sha256'); - $dig->addfile($fn); - push @{ $sums{ $dig->digest() } }, $fn; + $dig->addfile( $f->{fn} ); + push @{ $sums{ $dig->digest() } }, $f; } } # Print the groups of matched files (more than one share a checksum in the # final table) for my $group ( grep { @{$_} > 1 } values %sums ) { - printf "%s\n\n", join "\n", @{$group}; + printf "%s\n\n", join "\n", map { $_->{fn} } @{$group}; } |