fdupe 查找重复文件的Perl脚本代码(perl脚本运行结果只有表头)墙裂推荐

随心笔谈1年前 (2023)发布 admin
125 0

#!/usr/bin/perl

#

# fdupe tool – finding duplicate files

#

# $Id: fdupe,v 1.7 2011/10/14 20:11:21 root Exp root $

#

# Source code Copyright (c) 1998,2011 Bernhard Schneider.

# May be used only for non-commercial purposes with

# appropriate acknowledgement of copyright.

#

# FILE :        fdupe

# DESCRIPTION : script finds duplicate Files.

# AUTHOR:       Bernhard Schneider

# hints, crrections & ideas are welcome

#

# usage: fdupe.pl

#        find / -xdev | fdupe.pl

#

# how to select and remove duplicates:

#   redirect output to >file, edit the file and mark lines you

#   wish to move/delete with a preceding dash (-)

#   Use following script to delete marked files:

#   #!/usr/bin/perl -n

#   chomp; unlink if s/^-//;

#

# history:

# 12.05.99 – goto statment replaced with next

# 14.05.99 – minor changes

# 18.05.99 – removed confusing ‘for $y’

#            included hash-search

# 20.05.99 – minor changes

# 02.03.00 – some functions rewritten, optimized for speed

# 10.01.01 – hint-fix by Ozzie |ozric at kyuzz.org|

# 05.03.02 – fixed hangups by reading block/char-Devices

# 08.09.11 – skips checking of hard links

# 14.10.11 – accept file names from stdin

#

#use strict; # uncomment for debugging

$|=1;

local (*F1,*F2); my %farray=(); my $statF1;

# ——————————

# traverse directories

sub scan ($) {

    my ($dir)=$_[0];

    opendir (DIR, $dir) or die “($dir) $!:$@”;

    map {

          (-d) ? scan ($_) : push @{$farray{-s $_}},$_

             unless (-l or -S  or -p or -c or -b);

    } map “$dir/$_”, grep !/^\.\.?$/, readdir (DIR); closedir (DIR);

}

# ——————————

# get chunk of bytes from a file

sub getchunk ($$) {

  my ($fsize,$pfname)=@_;

  my $chunksize=32;

  my ($nread,$buff);

  return undef unless open(F1,$$pfname);

  $statF1=[(stat  F1)[3,1]];

  binmode F1;

  $nread=read (F1,$buff,$chunksize);

  ($nread==$chunksize || $nread==$fsize) ? “$buff” : undef;

# ——————————

# compare two files

sub mycmp ($) {

  my ($fptr)=$_[0];

  my ($buffa, $buffb);

  my ($nread1,$nread2);

  my $statF2;

  my ($buffsize)=16*1024;

  return -1 unless (open(F2,”<$$fptr”));

  $statF2=[(stat  F2)[3,1]];

  return 0

   if ($statF2->[0] > 1 && $statF1->[1]==$statF2->[1]);

  binmode F2;

  seek (F1,0,0);

  do {  $nread1=read (F1,$buffa,$buffsize);

     $nread2=read (F2,$buffb,$buffsize);

     if (($nread1 !=$nread2) || ($buffa cmp $buffb)) {

         return -1;

        }

  } while ($nread1);

  return 0;

}

# ——————————

print “collecting files and sizes …\n”;

if (-t STDIN) {

 $ARGV[0]=’.’ unless $ARGV[0]; # use wd if no arguments given

 map scan $_, @ARGV;

} else { 

 while ()  {

  s癧\r\n]$鞍g;

  push @{$farray{-s $_}},$_

   unless (-l or -S  or -p or -c or -b);

 }

}

print “now comparing …\n”;

for my $fsize (reverse sort {$a <=> $b} keys %farray) {

  my ($i,$fptr,$fref,$pnum,%dupes,%index,$chunk);

  # skip files with unique file size

  next if $#{$farray{$fsize}}==0;

  $pnum =0;

  %dupes=%index=();

  nx:

  for (my $nx=0;$nx<=$#{$farray{$fsize}};$nx++) # $nx now 1..count of files

  {                                             # with the same size

 $fptr=\$farray{$fsize}[$nx];          # ref to the first file

    $chunk=getchunk $fsize,$fptr;

    if ($pnum) {

   for $i (@{$index{$chunk}}) {

         $fref=${$dupes{$i}}[0];

      unless (mycmp $fref) {

            # found duplicate, collecting

         push @{$dupes{$i}},$fptr;

   next nx;

      }

   }

    }

    # nothing found, collecting

    push @{$dupes{$pnum}},$fptr;

    push @{$index{$chunk}}, $pnum++;

  }

  # show found dupes for actual size

  for $i (keys %dupes) {

    $#{$dupes{$i}} || next;

    print “\n size: $fsize\n\n”;

    for (@{$dupes{$i}}) {

        print $$_,”\n”;

    }

  }

}

close F1;

close F2;

© 版权声明

相关文章