#!/usr/bin/perl #last version: please check http://www.makarevitch.org/cpShuffleSature/ # #authors: #Nat #contribution by Jim Crumley (csbsju.edu): opt_nosubdir support # #public domain #version: 20040704_2 # some ideas borrowed from Adam Kessel's 'randomplay' code: # http://bostoncoop.net/adam/randomplay #to test it with no write operations: #mkdir t && mount -t tmpfs -o size=80M none t/ #rm -rf t/* #cpShuffleSature -v3 -t t >& cpsf.tmp #cut -f3 -d: cpsf.tmp|sort|uniq -c|sort -n # profiling: # perl -d:DProf ; dprofpp # perl -d:SmallProf ; perl_smallprof #TODO: new scoring scheme, test and let me know #TODO: seach for TODOs inside the code :-) #MP3 tags: http://developers.slashdot.org/article.pl?sid=04/02/26/1442231 #TODO: boucler en fin de copie car espace dispo a pu changer (ms quit si ce #n'est pas le cas) #TODO: we print lines somewhat randomly :-( #TODO: vérifier que répertoire target créé ssi probabilité raisonnable de #copie possible #TODO: explorer mc-foo, Moosic #TODO: --destroy => détruire fichiers existants sur cible, en fonction de #leur taille et CRC. ne pas détruire puis recopier un fichier => détruire à #mesure de l'espace nécessaire à la copie, en commençant par ceux qui ne se #trouvent le plus loin dans la liste de fichiers copiables. le compteur de #nombre de copies est incrémenté même en cas de copie n nécessaire (cible #déjà présente) #morceau détecté en cible : # - ms n présent en source : warning # - si pas --destroy ou morceau n destructible () et présent en src : # incrémenter compteur de copies use strict; use warnings; #use diagnostics; require 5.008_000; #Linux Debian: libcarp-assert-perl use Carp::Assert; #no Carp::Assert; # line commented out => debug mode # $DEBUGMODE=1 is only for developers because it forbids file copy and # directory creation my $DEBUGMODE=0; use Memoize; use Cwd; use File::Basename 'fileparse'; use File::Copy; use File::Find; use File::Path; use File::Glob ':glob'; use Getopt::Long qw(:config gnu_getopt); use locale; #use Date::Calc qw(Today Delta_Days); use Storable qw(lock_store lock_retrieve); use Digest::MD5 qw(md5 md5_base64); #Linux Debian: libfilesys-diskfree-perl use Filesys::DiskFree; #PARAM: change those if u need default values #TODO: dunno how to initialize this under MS-Windows #TODO: store all dft parms in this file my $opt_hist_file_name = $ENV{"HOME"} . "/.cpShuffleSature_history"; my $opt_pause=0; my $opt_maxplaytime=0; my $opt_prefername; my $opt_exclude; my @SourcePathDft='~/tmp/zik'; my $targetnameDft=qw(/mnt/usb); # running modes: # 1 copy # 2 play (not implemented) my $RunningMode=1; my @SourcePath; my $SourceDirRoot; my $targetname; my $verbose=0; my $dummy=0; my $namesonly=0; my $help=0; my $AvailRemain=0; my $wholedir=0; my $maxbytesindir=0; my $maxbytescopied=0; my $bytescopied=0; my $player='NopePlayer'; my $opt_showhist=0; my $opt_mintime=0; my $opt_days=0; my $TimeAtStartup=time(); my $opt_play=0; my $opt_quiet=0; #3 modes: # 0: use (read and write) # 1: use but don't write (read) # 2: don't use (no read nor write operation) my $opt_ignorehist=0; my $opt_zero=0; my $opt_noremember=0; my $opt_norandom=0; my $opt_maxfiles=0; my $processed_files=0; my $opt_nosubdir=0; # for players that don't read subdirs my $FilNamFilter='\.(MP3|WMA|ASF)$'; #'\.(MP3|OGG|WAV|WM[AVF]?|[JM]P(E)?G|AVI|MOV|VCD|CDA|DIVX|QT|SWF)$'; my $lockindicator='.cpsslock'; my $UnixHost=(-e '/dev/null'); my %copiedSourceDir; my %BytesinDir; # sizes of target directories my @filnames; # names of the selected source files #various informations about each source file copied at least one time #(values are preserved accross runs) my %Histo; #a HoH indexed with the stringfingerprint() of the file name #we do NOT use the file name which may contain strange chars #FileSize #file size #FileFingerprint #file fingerprint (MD5, SHA ?) # attributes used to establish a score, used to decide wether we will copy # it higher score => higher chance to be copied #FileCopiesCounter #counter of successful copies done #FileCopyLastDate #date of the last copy made #magic number of the Histo 'storable' file #TODO: MAGIC: FIXME #my $HistoMagicNumber='a19670728_2'; #we need to initialize those globaly my $histo_average_CopiesCounter=0; #TODO: maybe useless my $histo_above_max_filesize=1; #> length of the biggest file my $histo_average_filesize=0; my $date_of_this_run; my $time_origin=1053885782; # minimal current date (as expressed by time()) #TODO for 'play' (not 'copy') mode: a 'player' option which modifies #%PlayerPrograms (by default: "music123" or equivalent (dunno for #MS-Windows)) GetOptions( "sourcedir|s=s" => \@SourcePath, "target|t=s" => \$targetname, "sourcedirroot|r" => \$SourceDirRoot, "filter|f=s" => \$FilNamFilter, "avail|a=s" => \$AvailRemain, "whole|w" => \$wholedir, "maxbytesindir|m=s" => \$maxbytesindir, "copied|maxsize|c=s" => \$maxbytescopied, "verbose|v:+" => \$verbose, "dummy|d" => \$dummy, "player|p:s" => \$player, "names-only|namesonly|n" => \$namesonly, "showhistory" => \$opt_showhist, "ignorehistory:+" => \$opt_ignorehist, "0" => \$opt_zero, "noremember" => \$opt_noremember, "lockindicator=s" => \$lockindicator, "mintime=s", \$opt_mintime, "days=f" => \$opt_days, "norandom" => \$opt_norandom, "nosubdir:+" => \$opt_nosubdir, "prefername=s" => \$opt_prefername, "exclude=s" => \$opt_exclude, "historyfilename|histfile=s" => \$opt_hist_file_name, "pause=s", \$opt_pause, "maxplaytime=s", \$opt_maxplaytime, "play", \$opt_play, "quiet", \$opt_quiet, "tracks|maxtracks|maxfiles=i", \$opt_maxfiles, "help|h|?" => \$help ); # BEGIN subs sub filefingerprint($) { my $filnam=shift; my ($ctx, $res); open(FILE, $filnam) or die "fingerprint: Can't open '$filnam' ($!)"; binmode(FILE); $ctx = Digest::MD5->new; $ctx->addfile(*FILE); $res = $ctx->digest; close(FILE); return $res; } #sub stringfingerprint($) #{ # return md5_base64(shift); #} # Passed a filename, find all elements of it that are symlinks, and replace # with the symlink destination directory names. Calls itself recursivly until # no more symlinks are left in the filename. # Note that the path this returns may be ugly and have lots of extra /'s and # ..'s and .'s in it. Use GetAbsolutePath to clean it up. Also note that this # only works if it's passed an absolute path to begin with. Therefore, a # typical invocation will be something like: # GetAbsolutePath(DeSymlinkPath(GetAbsolutePath(file))) # author: Joey Hess sub DeSymlinkPath($); sub DeSymlinkPath($) { $_=shift; return '/' if ($_ eq '/'); my $dirty=undef; # set to 1 if we encounter a symlink. my @list=split(m:/:, $_); my $elt; my $a=''; foreach $elt (@list) { if (-l "$a/$elt") { my $b=readlink("$a/$elt"); $dirty=1; if (! ( $b=~m:^/:) ) { # relative symlink, add to current pwd. $a.="/$b"; } else { # absolute symlink, replaces current pwd. $a=$b; } } else { # normal directory or file, add to pwd. $a.="/$elt"; } } if ($dirty) { return DeSymlinkPath($a); } else { # print "DEBUG desymlinkpath: $a\n"; return $a; } } # Passed a filname that may be relative, determine the absolute filename. # So we have to get rid of relative pathnames, and we even have to handle # things like ./../../../usr/X11R5/../X11R6/bin/./foo sub GetAbsolutePath($) { $_=shift; # print "DEBUG : GetAbsolutePath in $_\n"; return '/' if ($_ eq '/'); $UnixHost and s/^~/$ENV{"HOME"}/; # expand tilde in basedir if (! m:^/:) { # doesn't start with / , so is a relative path. my $pwd=getcwd; # chomp $pwd; $_="$pwd/$_"; } s!//+!/!; my @dirlist; my $dir; foreach $dir (split(m:/:, $_)) { if ($dir eq '..') { pop @dirlist; # go down 1 directory. } elsif ($dir ne '.') { push (@dirlist,$dir); } } $_=join('/', @dirlist); return $_; } #TODO: memoize this function (profile it) memoize('GetScore'); sub GetScore($) { my $fnam=shift; my $score; #for a given score the biggest file will more prolly not be copiable #due to a lack of space, especially if some options (-a or -c) are #enabled, therefore we also take the filesize into account affirm { (defined $histo_average_CopiesCounter); } "histo_average_CopiesCounter"; affirm { (defined $Histo{$fnam}); } "Histo cell for $fnam defined"; affirm { (defined $Histo{$fnam}{qw/FileSize/} ); } "Histo FileSize cell for $fnam defined"; affirm { (defined $Histo{$fnam}{qw/FileCopiesCounter/} ); } "Histo FileCopiesCounter cell for $fnam is defined"; affirm { (defined $Histo{$fnam}{qw/FileCopyLastDate/} ); } "Histo FileCopyLastDate cell for $fnam is defined"; #file too big => very low score #this function is memoized => we cannot take $maxbytescopied (which varies #during the run) into account # ($Histo{$fnam}{qw/FileSize/} > $maxbytescopied) and # return -1e5; #'score': probability, for any file, of being copied $score = ( #copied many times => lower score ( $histo_average_CopiesCounter - ( $Histo{$fnam}{qw/FileCopiesCounter/} * $Histo{$fnam}{qw/FileCopiesCounter/})) #recently copied => lower score + ( ($date_of_this_run - $Histo{$fnam}{qw/FileCopyLastDate/}) / #seconds/hour * hours per days 3600 * 24) ); #bigger file => higher score #print "DEBUG kicksiz ($Histo{$fnam}{qw/FileSize/} " . $Histo{$fnam}{qw/FileSize/} / $histo_average_filesize . " score: " . $score . "\n"; #note: negative score => huge files scores will be lowered $score *= 1 + ( $Histo{$fnam}{qw/FileSize/} / $histo_average_filesize ); # print "DEBUGPER1 $score " . # ( int( $Histo{$fnam}{qw/FileSize/} / $histo_above_max_filesize )) # . "\t\t($Histo{$fnam}{qw/FileSize/})\n"; if ( ($opt_prefername) && ($fnam =~ m!$opt_prefername!oi) ) { if ($score<0) { $score=0; } else { $score *= 1.5; } } # print "DEBUG score: $fnam: $score size: $Histo{$fnam}{qw/FileSize/} copies: $Histo{$fnam}{qw/FileCopiesCounter/}\n"; return int($score); } sub shufflit($) { #TODO: we may use %Histo only (throw @filnam away)?? my $deck = shift; # $deck is a reference to an array my $i = @$deck; while ($i--) { my $j = int rand ($i+1); next if $i == $j; if ($opt_ignorehist < 2) { my $scorei = GetScore(@$deck[$i, $j]); my $scorej = GetScore(@$deck[$j, $i]); if ($i < $j) { next if ( (1.1 * $scorei) < $scorej); } else { next if ( $scorei > (1.1 * $scorej) ); } } # print "DEBUG shufflit ($i,$j): permuting @$deck[$i, $j](" # . GetScore(@$deck[$i,$j]). ") and @$deck[$j,$i] (". # GetScore(@$deck[$j,$i]).")\n"; @$deck[$i, $j] = @$deck[$j, $i]; } } my @UnitsTime= ( { 's'=> 1}, { 'm'=> 60}, { 'h' => 3600}, { 'd' => 86400}, { 'w' => 7*86400}, { 'M' => 30*86400}, { 'o' => 30*86400}, { 'q' => 3*30*86400}, # quarter { 'S' => 6*30*86400}, # semester { 'y' => 365.25*86400} ); my @UnitsSize= ( #don't dream {'Y' => 1e24}, { 'E'=> 1e15}, { 'P'=> 1e15}, { 'T' => 1e12}, { 'G' => 1e9}, { 'M' => 1e6}, { 'K' => 1024}, { 'k' => 1e3} ); sub rupt($$$$) { my $str=$_[0]; my $value=$_[1]; shift; shift; my($unitName, $amount)=@_; my $result; ($$value < $amount) and return 0; $result = int($$value / $amount); $$value = $$value % $amount; $$str .= "$result$unitName "; } sub getnumeric($\@) { #groks any simple math expression using units #thx to Emmanuel Manganneau for the programming approach my $str=shift; my $UnitsRef=shift; my @Units=@$UnitsRef; my $initstr=$str; my $total=0; die "Cannot grok $str: unit between digits" if ($str =~ /\d+\p{IsAlpha}+\d+/); # $str =~ s/(\p{IsAlpha}+)(\d+)/$2\*$1/g; $str =~ s/\s+$//g; # zap trailing spaces $str =~ s/\s+/\+/g; # replace all remaining spaces w '+' # print "DEBUG AVANT VU $str\n"; do {} while ($str =~ s/(\d|\p{IsAlpha})(\p{IsAlpha})/$1\*$2/g); # $str =~ s/(\d)(\p{IsAlpha})/$1\*$2/g; # $str =~ s/(\p{IsAlpha})(\p{IsAlpha})/$1\*$2/g; # print "DEBUG 2HOP $str\n"; my $count; my $unit; for $count ( @Units ) { for $unit (keys %$count) { $str =~ s/$unit/$count->{$unit}/g; } } $str =~ s/\+\+/\+/g; # print "DEBUG 3HOP $str\n"; #print "DEBUG EVAL $str\n"; $total = eval $str; #TODO if ($@) { err }; die "Cannot evaluate the numeric value $str (from $initstr)" if ($@); return($total); } sub shownumeric($$) { #TODO pour présentation des scores: # - gérer les valeurs négatives (précédées de '-') # - réduire la précision my $value=shift; my $maxused=shift; # max amount of units used my $used=0; my $str=''; my $count; my $unit; EXTER: for $count ( @UnitsSize ) { for $unit (keys %$count) { # print "$unit $count->{$unit}\n"; if (rupt(\$str, \$value, $unit, $count->{$unit})) { last EXTER if (++$used==$maxused); } } } if ($value && ((!$maxused) || ($used<$maxused)) ) { $str .= $value; } else { chop($str); } $str='0' if ($str eq ''); return $str; } #parms: #FileName, Fingerprint, FileCopiesCounter, FileCopyLastDate sub InitHistoMember($$$$) { my $FileName=shift; # print "DEBUG InitHisto1: $FileName\n"; affirm { -s $FileName; } "The file exists"; #'+.0001' is ridiculous but fixes a subtle bug (maybe in Storable, prolly #related to some type-handling) $Histo{$FileName}{qw/FileSize/} = (-s $FileName)+.0001; $Histo{$FileName}{qw/FileSize/} -= .0001; affirm { (defined $Histo{$FileName}{qw/FileSize/} ); } "InitHistoMember: Histo FileSize cell for $FileName defined"; $Histo{$FileName}{qw/Fingerprint/} = shift; $Histo{$FileName}{qw/FileCopiesCounter/} = shift; $Histo{$FileName}{qw/FileCopyLastDate/} = shift; # print "DEBUG InitHisto2: $Histo{$FileName}{qw/FileSize/} et "; # print -s $FileName; # print "\n"; } #amount of bytes used on the target filesystem for any file creation (even #an empty one) my $MetadataBytesPerFileCreated=64; sub FileSizeNotOKp($$) { my $tmpSourceSiz=-s shift; my $beSilent=shift; if ($bytescopied + $tmpSourceSiz > $maxbytescopied) { # print "DEBUG $source size: $tmpSourceSiz, max: $maxbytescopied copied: $bytescopied\n"; (!$beSilent && $verbose>8) and print "(seems too big)\n"; return 2; } # print"DEBUG must avail: $AvailRemain : " . (-s $elt) . " et " . getfreespace() ." donc ". ( getfreespace() - (-s $elt) ) . "\n"; if ( $AvailRemain > ( getfreespace(0) - ($MetadataBytesPerFileCreated + $tmpSourceSiz )) ) { (!$beSilent && $verbose>8) and print "(may be too big)\n"; return 3; } return 0; } my $lastDFValue=0; # just another global var. this is ugly :-( sub Mycopy($$$$) { my $source = shift; my $targ = shift; my $localmaindirtargetname = shift; my $localdirtargetname = shift; if (FileSizeNotOKp($source, 1)) { #the freespace may vary (because another program uses the #corresponding filesystem) and we prolly can't estimate exactly the #space we took on it, therefore we refresh the variable getfreespace(1); #then we can try again } if (FileSizeNotOKp($source, 0)) { return 10; } #print "\nDEBUG: Mycopy($source, $targ, localmaindirtargetname : $localmaindirtargetname, $localdirtargetname\n"; #if $targ already exists we will not overwrite it #note: $targ may not be in a directory, therefore this test is not #related to -d $localdirtargetname my $tmpSourceSiz=(-s $source); if (-e $targ) { #an existing target is not an abnormal condition (the target fs may already #contain files) if ( (-s $targ != $tmpSourceSiz) && ($verbose > 3) ) { warn "warning: $targ exists but is not a copy of $source, not copied"; } elsif ( ( ($opt_nosubdir) && ($verbose > 8) ) #"--nosubdir" => cpShuffleSature may try to copy two files which have the #same name (but are in different source directories) => an existing target #is less prolly an abnormal condition || ( (!$opt_nosubdir) && ($verbose > 6) ) ) { warn "exists on target ($targ), not copied"; } return 1; } my $targetcompletedir = $localmaindirtargetname . $localdirtargetname; if ($maxbytesindir) { # we must take care of the total size of the target dir content # calculating the size of the target dir if (!defined($BytesinDir{$targetcompletedir})) { $BytesinDir{$targetcompletedir}=diskspaceUsedInDir($targetcompletedir); } if ( ( $BytesinDir{$targetcompletedir} + ($tmpSourceSiz) ) > $maxbytesindir) { # the directory size after copy will be > to the max size of any # directory ($verbose > 5) and print "max size defined for any target directory forbids copy"; return 4; } } if ($dummy) { # dummy mode my $simpledirtargetname = substr($localdirtargetname, 1); ($verbose>4) and print "I will copy it to $simpledirtargetname (approx @{[shownumeric($lastDFValue, 3)]} free)"; } else { # processing of the file if (!$DEBUGMODE) { #TODO #if we are to invoke something (for example a player): # s/`/\\`/gi; # escape backticks to avoid shenanigans with filenames if ($namesonly) { print "$source\n"; } elsif ($RunningMode == 2) { #TODO: play mode (not copy!): # ($opt_pause) and sleep $opt_pause; affirm { 0 } "Player mode not implemented"; } elsif ($RunningMode == 1) { # copy mode if (!copy($source, $targ)) { #copy error! ($verbose > 6) and print " Error: $!\n"; $MetadataBytesPerFileCreated+=64; #TODO: test this: unlink $targ if (-e $targ); getfreespace(1); return 5; } if (! -e $targ) { # don't die because it may be some transient err warn "PROBLEM: unknown error during copy of $source to $targ"; return 6; } if ($tmpSourceSiz != -s $targ) { # it happens (Control-c...) warn "PROBLEM: targetfile $targ size is not equal to sourcefile size"; if ((-f $targ) && (unlink($targ)!=1)) { print STDERR "PROBLEM: moreover the targetfile cannot be deleted\n"; return 7; } return 8; } #this timing is stupid because it ignores the buffercache #we may use File::Sync but it will not be cache-friendly # if ($verbose>8) # { # my $duration=time()-$timer; # (!$duration) and $duration=1; # printf "(%d seconds, %d Bps)\n", $duration, $tmpSourceSiz/$duration; # } ($verbose>6) and print "\ncopied to $targ\n"; } } } # file processed $processed_files++; if ($processed_files==$opt_maxfiles) { #we don't print anything because -n implies 'no verbosity' #print "Max amount of processed files ($opt_maxfiles) reached\n"; #exit. we trap it thru END{} exit 0; } $maxbytesindir and $BytesinDir{$targetcompletedir} += $tmpSourceSiz; $lastDFValue -= $tmpSourceSiz - $MetadataBytesPerFileCreated; # print "DEBUG $bytescopied avant\n"; $bytescopied += $tmpSourceSiz; # print "DEBUG $bytescopied après\n"; if ($opt_ignorehist < 2) { #various post-action things to do affirm { (defined($Histo{$source})); } "Histo cell for $source defined"; affirm { (defined $Histo{$source}{qw/FileSize/} ); } "Histo FileSize cell for $source defined"; my $fgprt=0; if ($dummy || $namesonly) { # we don't want to establish the file fingerprint during a dummy or # namesonly session $fgprt = 'Nopefgprt'; } else { #note: in 'copy' or 'player' mode : if necessary we calculate any file #fingerprint here (just after the copy) because the file content prolly #resides in some memory cache ( ( ($RunningMode==1) or ($RunningMode==2) ) and (!$DEBUGMODE)) and $fgprt = filefingerprint($source); } if ( $Histo{$source}{qw/FileSize/} != $tmpSourceSiz ) { if ($fgprt cmp 'Nopefgprt') { ($verbose > 4) and print " new file (false filesize in history), reseting history data\n"; #TODO: decide wether we reset completely. after all the file #prolly has the same content (song or whatever) InitHistoMember($source, $fgprt, .9 * $histo_average_CopiesCounter, $date_of_this_run); } } elsif ($Histo{$source}{qw/Fingerprint/} eq 'Nopefgprt') #useless because we always init FingerPrint: #|| (! defined($Histo{$source}{qw/Fingerprint/}) { #fingerprint not established $Histo{$source}{qw/Fingerprint/}=$fgprt; } elsif ( $Histo{$source}{qw/Fingerprint/} != $fgprt ) { ($verbose > 4) and print " new file (false fingerprint in history), reseting history data\n"; InitHistoMember($source, $fgprt, .9 * $histo_average_CopiesCounter, $date_of_this_run); } # print "DEBUG Histo bumping FileCopiesCounter for $source\n"; $Histo{$source}{qw/FileCopiesCounter/}++; $Histo{$source}{qw/FileCopyLastDate/}=$date_of_this_run; #garbage collection: we do not keep more than 20 dates # my $nbrefs=$#{$Histo{stringfingerprint($source)}{qw/FileCopiesDates/}}; # while ($nbrefs >= 20) # { # shift @{$Histo{stringfingerprint($source)}{qw/FileCopiesDates/}}; # } # push @{$Histo{stringfingerprint($source)}{qw/FileCopiesDates/}}, time(); # } # else # { # there was no element in %History for this sourcefile # InitHistoMember($source, $fgprt, .9*$histo_average_CopiesCounter, ()); # } } if ($bytescopied >= $maxbytescopied) { ($verbose) and print "Enough bytes copied (-c)\n"; #TODO #exit. we trap it thru END{} exit 0; } #print "DEBUG mycopy return 0\n"; return 0; } sub die_help () { print < $tim) && ( $DFcalls < $callsbeforeflush ) ) . "\n"; if ( $force || ( ( $lastTime + $latency > $tim) && ( $DFcalls < $callsbeforeflush) )) { #print "DEBUG: DF cache\n"; ($dummy) or $DFcalls++; # in dummy mode the cache must not be flushed return $lastDFValue; } #tainting $ENV{PATH} = '/bin:/usr/bin:/usr/local/bin'; $handle = new Filesys::DiskFree; #print "DEBUG: DF NON cache\n"; $handle->df(); # in most cases this oscillates bw max and min values. we need some #my $estimation=$lastDFValue; # latencyMaxIncrement value. # $lastDFValue = $handle->avail($targetname); # $latency = abs($lastDFValue - $estimation) / ($lastDFValue/10); # print "DEBUG latency $latency\n"; # if ($latency<$minlentcy) # { $latency=$minlatency; } # elsif ($latency>$maxlatency) # { $latency=$maxlatency; } $lastDFValue = $handle->avail($targetname); $lastTime = $tim; $DFcalls = 0; return $lastDFValue; } } # end of private static variables sub fileIsreadable($$) { my $filnam = shift; my $verbo = shift; if (! -f $filnam) { ($verbo) and print "$filnam is not a file, skipped\n"; return 0; } if (! -r $filnam) { ($verbo) and print "$filnam is not readable, skipped\n"; return 0; } if (-z $filnam) { ($verbo) and print "$filnam is empty, skipped\n"; return 0; } return 1; } { #private static variables my $dir = shift; my $tempd; sub cumuldir() { $tempd += -s ; } sub diskspaceUsedInDir($) { my $dir=shift; (-d $dir) or return 0; # zap trailing slashes $dir =~ s!/*$!!; $tempd=0; # find (sub {print "DEBUT : "; print ; print " FIN\n" ; $tempd += -s ;}, $dir); File::Find::find( {wanted => \&cumuldir, follow => 1}, $dir); return $tempd; } } #end of private static variables my $lockindicatorregexp; sub SelectFiles() { # my ($dev,$ino,$mode,$nlink,$uid,$gid); (-d $_) and return; affirm { $lockindicatorregexp; } "\$$lockindicatorregexp not established"; #let's ignore some directories # beware: regexp option 'o' used => do not modify $lockindicator if ($File::Find::dir =~ m/$lockindicatorregexp/oi) { ($verbose > 8) and print "$File::Find::dir ignored due to lockindicator\n"; return; } # beware: regexp option 'o' used => do not modify $FilNamFilter! if (!/$FilNamFilter/ois) { ($verbose > 8) and print "$_ filtered out (-f)\n"; return; } if ( ($opt_exclude) && (/$opt_prefername/oi) ) { ($verbose > 8) and print "$_ filtered out (--exclude)\n"; return; } fileIsreadable($_, ($verbose > 6) ) or return; if ( ($opt_mintime) && (defined $Histo{$File::Find::name}{qw/FileCopyLastDate/}) ) { if ($TimeAtStartup - $opt_mintime < $Histo{$File::Find::name}{qw/FileCopyLastDate/}) { ($verbose > 8) and print "$_ filtered out (mintime)\n"; return; } } push @filnames, $File::Find::name; } #sort() helper function (%Histo per score) sub ByScore($$) { my $a = shift; my $b = shift; return (GetScore($b) <=> GetScore($a)); } # END subs # affirm { (getnumeric("M - G/k", @UnitsSize)!=0); }; # affirm { (getnumeric("1G 2k", @UnitsSize)!=1e9+2e3); }; # affirm { (getnumeric("1 2 3", @UnitsSize)!=6); }; # affirm { (getnumeric("1 +2 3", @UnitsSize)!=6); }; # affirm { (getnumeric("M -1", @UnitsSize)!=1e6-1); }; # affirm { (getnumeric("Mk-G", @UnitsSize)!=0); }; # affirm { (getnumeric("M2 - 2kk", @UnitsSize)!=0); }; # affirm { (getnumeric("YM2 - 2kkZk", @UnitsSize)!=0); }; die_help() if ($help || $Getopt::Long::error); ( $dummy && !$verbose ) and $verbose=9; if ($namesonly) { $dummy and die "I cannot simultaneously honour -n and -d"; $verbose and die "I cannot simultaneously honour -n and -v"; } $opt_zero and $opt_ignorehist=2; $opt_noremember and $opt_ignorehist=1; ( ($opt_ignorehist>=2) && $opt_showhist) and die "Cannot completely ignore AND show the history file content"; ($opt_ignorehist>2) and die "Illegal value for ignorehistory"; affirm { $opt_ignorehist<3 && $opt_ignorehist>=0; } "parm opt_ignorehist value"; $opt_mintime and $opt_mintime=getnumeric($opt_mintime, @UnitsTime); if ($opt_days) { if ($opt_mintime) { $opt_mintime+=getnumeric($opt_days . 'd', @UnitsTime); print 'warning: simultaneous use of --days and --mintime. I will cumulate. Result: ' . $opt_mintime; } else { $opt_mintime=getnumeric($opt_days . 'd', @UnitsTime); } } ($opt_mintime<0) and die "--mintime cannot be negative"; ($opt_mintime && $opt_ignorehist) and die "--mintime is not compatible with --ignorehistory"; ( $opt_quiet && (!$opt_play) ) and warn "--quiet implies --play"; ( $opt_pause && (!$opt_play) ) and warn "--pause implies --play"; $opt_maxplaytime and $opt_maxplaytime=getnumeric($opt_days . "d", @UnitsTime); ($opt_maxplaytime<0) and die "maxplaytime cannot be negative"; ($opt_maxfiles<0) and die "maxfiles must be greater than 1"; #needed by diskfree $ENV{LANG} = 'C'; $ENV{DF_BLOCK_SIZE}=''; $ENV{BLOCK_SIZE}=''; #history data loaded? my $histokp=0; my $cntelts=0; if ( ($opt_ignorehist<2) && -e $opt_hist_file_name) { my $Histref; eval { $Histref=Storable::lock_retrieve($opt_hist_file_name) || die "Cannot open the history file"; }; #the Storable class traps the 'damaged file case' $@ and die "I can not use this history file ($opt_hist_file_name, Perl error is: $@)"; if ( defined($Histref)) { $histokp=1; %Histo=%$Histref; #check: version of the 'format' (data structure) used in the history file #TODO: MAGIC: FIXME # if (0) # { # ($Histo{stringfingerprint('cpShuffleSatureMagicNumber')}{qw/FileCopiesCounter/} eq $HistoMagicNumber) or die # "I can not use this version of the history file ($opt_hist_file_name)"; # delete $Histo{stringfingerprint('cpShuffleSatureMagicNumber')}{qw/FileCopiesCounter/}; # delete $Histo{stringfingerprint('cpShuffleSatureMagicNumber')}; # } #let's calculate the arith average number of copies done my $elt; #TODO: use the fastest iterator avail, dunno if foreach/keys is an adequate #couple $histo_average_filesize = 0; $histo_above_max_filesize = -1; $histo_average_CopiesCounter = 0; foreach $elt (keys %Histo) { affirm { (defined $Histo{$elt}); } "Histo cell for $elt defined"; affirm { (defined $Histo{$elt}{qw/FileSize/} ); } "Histo FileSize cell for $elt defined"; affirm { (defined $Histo{$elt}{qw/FileCopiesCounter/} ); } "Histo FileCopiesCounter cell for $elt is defined"; affirm { (defined $Histo{$elt}{qw/FileCopyLastDate/} ); } "Histo FileCopyLastDate cell for $elt is defined"; if ($verbose>7) { if (! -e $elt) { print "$elt (in our history file) does not exists anymore\n"; } elsif ( (-s $elt) cmp $Histo{$elt}{qw/FileSize/}) { printf "$elt (size %f) is not the file described in our history file (%f), we reset the corresponding history description\n", -s $elt, $Histo{$elt}{qw/FileSize/}; InitHistoMember($elt, 'Nopefgprt', .9 * $histo_average_CopiesCounter, 0); } } $histo_average_filesize += $Histo{$elt}{qw/FileSize/} * $Histo{$elt}{qw/FileSize/}; ( $Histo{$elt}{qw/FileSize/} > $histo_above_max_filesize ) and $histo_above_max_filesize=$Histo{$elt}{qw/FileSize/}; $histo_average_CopiesCounter+=$Histo{$elt}{qw/FileCopiesCounter/}; #to obtain the number of elts in an associative array one may use #$nb=keys %AssocArrayName #but right now we use this necessary 'foreach' loop $cntelts++; } if ($cntelts) { $histo_above_max_filesize*=1.1; $histo_average_filesize=sqrt($histo_average_filesize); $histo_average_CopiesCounter = int($histo_average_CopiesCounter/$cntelts); } elsif ($verbose > 3) { warn "empty history file"; } } } if (!$cntelts) { #empty/nonexisting history file $histo_above_max_filesize=1e99; $histo_average_filesize=0.1; $histo_average_CopiesCounter = 0; } affirm { ($histo_above_max_filesize>0) } "variable histo_above_max_filesize"; affirm { ($histo_average_filesize>0) } "variable histo_average_filesize"; $date_of_this_run=time(); if ($opt_showhist) { my $elt; my $fld; my $NamFil; print "Content of the history file ($opt_hist_file_name):\nVCopies\tSize\t\tScore\tFile name\n"; $maxbytescopied=1e99; foreach $elt (sort keys %Histo) { printf "$Histo{$elt}{qw/FileCopiesCounter/}\t%-8s\t%-8s\t%s\n", shownumeric($Histo{$elt}{qw/FileSize/}, 3), shownumeric(GetScore($elt), 3), substr($elt, -40); } print "\nVCopies arith average: $histo_average_CopiesCounter\n"; exit 0; } if (! @SourcePath) { @SourcePath = @SourcePathDft; print "No selection filter provided, using default '@SourcePath'\n" if ($verbose>7); } @SourcePath = split(/,/, join(',', @SourcePath)); if (!$targetname) { if ( ($player eq 'NopePlayer') || !$player ) { $targetname=$targetnameDft; print "No target provided, using default '$targetname'\n" if ($verbose>3); } else { $targetname=$player; print "No target provided, using -the p argument ($player)\n" if ($verbose>3); } } my $newByCop; # new temptative value for $maxbytescopied if ($AvailRemain) { $AvailRemain = int(getnumeric($AvailRemain, @UnitsSize)); ($AvailRemain < 1) and die "illegal value for -a"; $newByCop=getfreespace(0)-$AvailRemain; ($newByCop < 1) and die "-a parameter: not enough space on target (@{[shownumeric(getfreespace(0), 0)]} bytes)"; ($verbose>8) and print "I will let @{[shownumeric($AvailRemain, 0)]} bytes available on target\n"; } if (!$maxbytescopied) { if (defined $newByCop) { $maxbytescopied = $newByCop; } else { $maxbytescopied=getfreespace(0); } } else { $maxbytescopied = int(getnumeric($maxbytescopied, @UnitsSize)); ($maxbytescopied < 1) and die "illegal value for -c"; if ( (defined $newByCop) && ($maxbytescopied > $newByCop) ) { # print "DEBUG $maxbytescopied $newByCop\n"; ($verbose > 3 ) and print "ignoring -c because -a is stated\n"; $maxbytescopied = $newByCop; } } ($verbose > 8) and print "I will process at most @{[shownumeric($maxbytescopied, 0)]} bytes from source\n"; if ($maxbytesindir) { $maxbytesindir=int(getnumeric($maxbytesindir, @UnitsSize)); ($verbose>8) and print "Max size of a directory: $maxbytesindir\n"; ($maxbytesindir =~ m/[^\d]/) and die "-m: non-numeric value"; ($maxbytesindir < 1) and die "-m: cannot use this value"; } if ($player cmp 'NopePlayer') { # -p used # this test is probably sufficient $UnixHost or die "-p is only useful under Unix"; if (!$player) { # no argument to -p if ($targetname) { # -t argument provided $player = $targetname; print "no mounting point provided, using -t argument '$player'\n" if ($verbose>7); } # GetOpt takes care of this: # else # { die "-t option: argument needed;" } } elsif (!$targetname) { # no argument to -t $targetname=$player; print "no target provided, using -p argument '$player'\n" if ($verbose>5); } print "mounting $player\n" if ($verbose>8); if (!$dummy) { #tainting $ENV{PATH} = '/bin:/usr/bin:/usr/local/bin'; if (system("mount $player")) { print STDERR "PROBLEM: mount $player FAILED\n"; } } } $targetname=GetAbsolutePath(DeSymlinkPath(GetAbsolutePath($targetname))); (-d $targetname) or die "Target dir ($targetname) does not exists"; $SourceDirRoot= (! defined $SourceDirRoot) ? 1 : 0; # establish $lockindicatorregexp here for use in SelectFiles $lockindicatorregexp=$lockindicator.'$'; $lockindicatorregexp =~ s/\./\\./; #print "DEBUG \ntarget: $targetname, filtre: $FilNamFilter, usrpathfilt: $SourceDirRoot, remain: $AvailRemain, whole: $wholedir, maxbyt: $maxbytesindir, verb: $verbose\n"; #exit; # go! affirm {$maxbytescopied;} "maxbytescopied variable OK"; my $SourceDir; foreach $SourceDir (@SourcePath) { # we must convert $SourceDir into an absolute path $SourceDir=GetAbsolutePath(DeSymlinkPath(GetAbsolutePath($SourceDir))); print "\nselecting through $SourceDir by '$FilNamFilter'\n" if ($verbose>6); File::Find::find( {wanted => \&SelectFiles, no_chdir => 1, follow => 1, follow_skip => 2}, $SourceDir); my $directoriescreated; if ($#filnames<1) { ($verbose>6) and print "warning: no files selected\n"; next; } # %Histo must contain all @filnam members my $elt; foreach $elt (@filnames) { if (! defined $Histo{$elt}{qw/FileSize/} ) { InitHistoMember($elt, 'Nopefgprt', 0, 0); } # else{ print "DEBUG: $elt existe dans Histo\n"; } } # foreach $elt (keys %Histo) # { # print $elt; # print $Histo{$elt}{qw/FileSize/}; # print $Histo{$elt}{qw/Fingerprint/}; # print $Histo{$elt}{qw/FileCopiesCounter/}; # print $Histo{$elt}{qw/FileCopyLastDate/}; # } #do NOT delete %Histo members which are not in @filnam! #the fact that they have not been selected does not imply that we have to #forget their informations @filnames = sort ByScore @filnames; #let's shuffle (!$opt_norandom) and shufflit(\@filnames); # foreach $elt (@filnames) # { # print "$elt " . GetScore($elt) . #" siz: $Histo{$elt}{qw/FileSize/}, fgpr: $Histo{$elt}{qw/Fingerprint/}, cop: $Histo{$elt}{qw/FileCopiesCounter/}, dat: $Histo{$elt}{qw/FileCopyLastDate/}\n"; # } my ($repname, $targetrepname,$tocopy, $resul, $filename, $fileextension); #we will: #zamine each filename 0 or 1 time #get the free disk space nearly each time because other softwares can # access to volume stores the target dir foreach $elt (@filnames) { #already donne (during selection) #next if (!fileIsreadable($elt, ($verbose > 6))); affirm { -r $elt; } "file existence"; print "\n$elt " if ( $verbose > 2 ); # fetch the directory part ($filename, $repname, $fileextension) = fileparse($elt, '[.].+?'); if ($SourceDirRoot) { # zap $SourceDir from the name component # beware: regexp option 'o' used => do not modify $SourceDir my $protec = '$repname =~ s!$SourceDir!!o;'; eval $protec; #TODO if ($@) { err }; } if ($opt_nosubdir) { $targetrepname="//"; } else { $targetrepname=$repname; } my $suffix = ""; if ($opt_nosubdir > 1 ) { $suffix = $repname; $suffix =~ s/^\///; # Pull off leading '/' $suffix =~ s/\//___/g; # Change any '/' to '___' $suffix =~ s/^/\//; # Put a leading '/' back on $tocopy = $targetname . $suffix . $filename . $fileextension; } else { $tocopy = $targetname . $targetrepname . $filename . $fileextension; } # let's zap the '/' ending $repname my $tmpr=$targetrepname; chop($tmpr); if (-d $targetname . $tmpr . $lockindicator) { ($verbose > 8) and print "$targetname$tmpr$lockindicator/ (lockindicator) exists\n"; next; } if ( ! -d $targetname . $targetrepname ) { # the target dir does not exists affirm { (!$copiedSourceDir{$SourceDir.$repname}); } "not in the list of already processed source directories"; if ( $maxbytesindir && ( (-s $elt) > $maxbytesindir) ) { # the file size is > to the max size in any directory ($verbose > 5) and print "max size defined for any target directory forbids copy"; next; } # let's create the target dir if (!$dummy && !$DEBUGMODE) { print "\n" if ($verbose > 3); eval { $directoriescreated = mkpath($targetname . $targetrepname, ($verbose > 7) ); }; $directoriescreated || die "\nError while creating a directory, aborting. error: $@"; $maxbytesindir and $BytesinDir{$targetname . $targetrepname}=0; # printf "\nDEBUG now mkpath $targetname$repname (%d reps)\n", $directoriescreated; $lastDFValue+= $directoriescreated * 512; } } else { $directoriescreated=0; } # processing $resul=99; # this means "for now there was no file copied" # if -w if ($wholedir) { # if directory not already explored for processing if (!exists $copiedSourceDir{$SourceDir.$repname}) { # we now copy all files in the src directory # at this stage all target directories are created # print "DEBUG : wholedir sélection file $elt (en $repname)\n"; my (@filn, $f, $localtocopy, $localfilename, $localfileextension); @filn = glob($SourceDir.$repname.'*'); #some players read in copy order (often dir area of a VFAT #filesystem). therefore we must sort alphabetically the list #of files of a directory before beginning the processing @filn = sort @filn; foreach $f (@filn) { # print "DEBUG : wholedir copie $f\n"; # the FilNamFilter applies when copying a whole dir ('-w' option) ($f =~ /$FilNamFilter/is) or next; #already donne (during selection) #next if (!fileIsreadable($elt, ($verbose > 6))); my $useless; ($localfilename, $useless, $localfileextension) = fileparse($f, '[.].+?'); if ($opt_nosubdir > 1) { $localtocopy = $targetname . $suffix . $localfilename . $localfileextension; } else { $localtocopy = $targetname . $targetrepname . $localfilename . $localfileextension; } print "\n-w enabled: copying $f\n" if ($verbose>8); #print "DEBUG copy wholedir: (localtocopy: $localtocopy), f: $f, localcopy: $localtocopy, targetname: $targetname, repname: $repname\n"; # ONE single file copied is sufficient to declare that the # copy did not fail and in such a case $result must contain 0 $resul = Mycopy($f, $localtocopy, $targetname, $targetrepname); #print "DEBUG appel mycopy1: $resul\n"; } $copiedSourceDir{$SourceDir.$repname}=1; } } # if wholedir else { $resul=Mycopy($elt, $tocopy, $targetname, $targetrepname); #print "DEBUG appel mycopy2: $resul\n"; } #printf "\nDEBUG resul: $resul and $directoriescreated\n"; if ($resul && $directoriescreated) { # we could not copy the file # the created directories are empty, let's try to delete them print "\ncopy failed (code: $resul), deleting the target directory\n" if ($verbose>3); # rmtree deletes only the last directory of its argument my $rep=$directoriescreated; my $strr=$targetname . $targetrepname; while ( $rep>=0 ) { #print "DEBUG rmtree $strr ($rep)\n"; $rep--; rmtree($strr, ($verbose > 7)); $strr =~ s!(.*)/[^/]*!$1!; } $lastDFValue -= $directoriescreated * 512; } } } #no more elts print "\nno more files to copy\n" if $verbose; #if ($verbose > 8) #{ #TODO: various stats #bytes copied, disk space used ... #TODO #detection of redundant source files (same size and fingerprint) #beware: same fingerprint may also reveal file displacement or renaming #} END { if ($player cmp 'NopePlayer') { print "unmounting $player\n" if ($verbose>8); if ( !$dummy ) { #tainting $ENV{PATH} = '/bin:/usr/bin:/usr/local/bin'; if (system("umount $player")) { print STDERR "PROBLEM: umount $player FAILED\n"; } } } if (!$opt_ignorehist && !$dummy) { #save the history file #purge the History by deleting all %History members never copied my $elt; foreach $elt (keys %Histo) { #delete Histo elements which were not used # if ( # ( $Histo{$elt}{qw/FileCopiesCounter/} == # .9 * $histo_average_CopiesCounter) ) # { # delete $Histo{$elt}; # next; # } #TODO #if the file does not exists anymore and the last copy is more #than 3 month old we may delete the corresponding element in $Histo } #establish: version of the 'format' (data structure) used in the history #file, used as a 'local magic data' #we do not init this via InitHistoMember because this function tries to get #the file size #TODO: MAGIC: FIXME # if (0) { # $Histo{'cpShuffleSatureMagicNumber'}{qw/FileCopiesCounter/} # = $HistoMagicNumber; # } #we lose data after a violent break #=> store in tmp file, check save then mv to the final file my $tmpfilename = "$opt_hist_file_name.tmp"; #TODO in order to trap errors: evaluate this Storable::lock_store(\%Histo, $tmpfilename) or die "can't store the temporary history file"; if (-e $opt_hist_file_name) { # there is already a history file (read at program startup) # let's rename it w the ".old" suffix if (-e "$opt_hist_file_name.old") { unlink "$opt_hist_file_name.old" or die "cannot delete the old history file ($opt_hist_file_name.old)"; } if (!rename($opt_hist_file_name, "$opt_hist_file_name.old")) { die "cannot rename the existing history file"; } } if (!rename($tmpfilename, $opt_hist_file_name)) { # failure: we could not rename the tmp file if (-e "$opt_hist_file_name.old") { # there is a old one, let's restore it if (-e "$opt_hist_file_name.old") { warn "cannot restore the old history file" if (!rename("$opt_hist_file_name.old", $opt_hist_file_name)); } die "can't rename the temporary history file"; } } if (-e "$opt_hist_file_name.old") { unlink "$opt_hist_file_name.old" or warn "cannot delete the old history file"; } affirm { -e $opt_hist_file_name; } "the history file exists"; # my $Histref; # $Histref=Storable::retrieve($opt_hist_file_name); # my %HistoDeux=%$Histref; # #TODO: MAGIC: FIXME # #delete $HistoDeux{'cpShuffleSatureMagicNumber'}; # foreach $elt (sort keys %Histo) # { # ($Histo{$elt}{qw/FileSize/} != $HistoDeux{$elt}{qw/FileSize/}) and # print "FileName $elt\n"; # ($Histo{$elt}{qw/FileCopiesCounter/} != # $HistoDeux{$elt}{qw/FileCopiesCounter/}) and # print "VCopies $elt\n"; # } # foreach $elt (sort keys %HistoDeux) # { # ($Histo{$elt}{qw/FileCopiesCounter/} != # $HistoDeux{$elt}{qw/FileCopiesCounter/}) and # print "VCopies $elt\n"; # } } } exit 0;