123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116 |
- #!/usr/bin/perl
- # Daniel "Trizen" Șuteu
- # License: GPLv3
- # Date: 22 June 2012
- # https://github.com/trizen
- # Find files which have exactly or *ALMOST*
- # exactly the same name in a given path.
- # Improved version here:
- # https://github.com/trizen/perl-scripts/blob/master/Finders/fsfn.pl
- use 5.014;
- use strict;
- use warnings;
- use File::Find qw(find);
- use Getopt::Std qw(getopts);
- use Text::Unidecode qw(unidecode);
- my @dirs = grep { -d } @ARGV;
- die <<"HELP" if !@dirs;
- usage: $0 [options] /my/path [...]
- Options:
- -f : keep only the first file
- -l : keep only the last file
- HELP
- my %opts;
- if (@ARGV) {
- getopts("fl", \%opts);
- }
- sub compare_strings ($$) {
- my ($name1, $name2) = @_;
- return 0 if $name1 eq $name2;
- if (length($name1) > length($name2)) {
- ($name2, $name1) = ($name1, $name2);
- }
- my $len1 = length($name1);
- my $len2 = length($name2);
- my $min = int(0.5 + $len2 / 2);
- return -1 if $min > $len1;
- my $diff = $len1 - $min;
- foreach my $i (0 .. $diff) {
- foreach my $j ($i .. $diff) {
- if (index($name2, substr($name1, $i, $min + $j - $i)) != -1) {
- return 0;
- }
- }
- }
- return 1;
- }
- sub find_duplicated_files (&@) {
- my $code = shift;
- my %files;
- find {
- wanted => sub {
- lstat;
- -f _ && (not -l _) && push @{$files{"key"}}, # to group files by size, change the "key" to '-s _' (unquoted)
- {
- name => do { utf8::decode($_); lc(unidecode($_) =~ s{\.\w+\z}{}r) },
- real_name => $File::Find::name,
- };
- }
- } => @_;
- foreach my $files (values %files) {
- next if $#{$files} < 1;
- my %dups;
- foreach my $i (0 .. $#{$files} - 1) {
- for (my $j = $i + 1 ; $j <= $#{$files} ; $j++) {
- if (compare_strings($files->[$i]{name}, $files->[$j]{name}) == 0) {
- push @{$dups{$files->[$i]{real_name}}}, ${splice @{$files}, $j--, 1}{real_name};
- }
- }
- }
- while (my ($fparent, $fdups) = each %dups) {
- $code->(sort $fparent, @{$fdups});
- }
- }
- return 1;
- }
- {
- local $, = "\n";
- find_duplicated_files {
- say @_, "-" x 80 if @_;
- foreach my $i (
- $opts{f} ? (1 .. $#_)
- : $opts{l} ? (0 .. $#_ - 1)
- : ()
- ) {
- unlink $_[$i] or warn "[error]: Can't delete: $!\n";
- }
- }
- @dirs;
- }
|