use Cwd;
use Date::Parse;
use Date::Format;
+use Getopt::Long;
use LWP::UserAgent;
use Time::Local;
##################
# by default, this is not a test
-my $istest = 0;
+my $verbose = 0;
# by default, this is the working directory
-my $workingdir = getcwd();
+my $workingdir = ".";
# get the time and date - using UTC
my $timenow = time();
# otherwise we do our normal processing
my $listchangefiles = 0;
-my $outputfile;
-
# The root URL to fetch files from
my $remoteroot;
# process command line arguments
################################
-foreach my $argnum (0 .. $#ARGV) {
- my $argument = $ARGV[$argnum];
- if ($argument eq "--test") {
- $istest = 1;
- } elsif ($argument =~ /^--workingdir=(.*)$/) {
- $workingdir = $1;
- } elsif ($argument =~ /^--now=(.*)$/) {
- $timenow = str2time($1);
- } elsif ($argument =~ /^--output=(.*)$/) {
- $outputfile = $1;
- } elsif ($argument =~ /^--remoteroot=(.*)$/) {
- $remoteroot = $1;
- } elsif ($argument =~ /^--list-change-files/) {
- $listchangefiles = 1;
- }
+sub usage() {
+ print STDERR <<EOF
+Usage: update.pl [options]
+
+ --remoteroot=url Set the remote root URL to mirror from (mandatory)
+ --workingdir=path Set the root of the local mirror (default ".")
+ --now=timestring Pretend it's currently the specified time
+ --list-change-files Just list the change files we would fetch
+ --verbose Run in verbose mode
+ --help Output this help
+
+EOF
+ ;
+ exit 1;
}
+GetOptions( "verbose!" => \$verbose,
+ "workingdir=s" => \$workingdir,
+ "now=s" => sub($) { $timenow = str2time($_[0]); },
+ "remoteroot=s" => \$remoteroot,
+ "list-change-files!" => \$listchangefiles,
+ "help" => sub() { usage(); } );
+
+usage() if not defined $remoteroot;
+
# the directory where all files with state are kept
my $statedir = "$workingdir/state";
if (! -e $statedir) {
keep_alive => 1);
-##################
-# the program flow
-##################
-
-# first work out when the last time we were up to date is and
-# find present time.
-my $timeoflastupdate = findLastUpdateTime();
-
-if ($istest) {
- print STDERR "timenow is ".time2str("%c",$timenow)." \n";
- print STDERR "timeoflastupdate is ".time2str("%c",$timeoflastupdate)." \n\n";
-}
-
-# Now we know which days' changes we need to get from the server
-my @changesfiles = getChangesFileList();
-if ($istest or $listchangefiles) {
- foreach my $file (@changesfiles) { print STDERR "using changes file $file\n"; }
- exit 0 if $listchangefiles;
-}
-
-# get the changes files
-my @changesfilecontent;
-foreach my $file (@changesfiles) { push @changesfilecontent, getChangesFile($file); }
-
-# if the file has not changed (response code 304) then ignore it
-
-# iterate over all the fetched files, building up a list of files
-# to fetch/delete
-my %files;
-foreach my $changes (@changesfilecontent) {
- my @changes = split /[\r\n]+/, $changes;
- foreach my $change (@changes) {
- my ($time, $op, $path) = split ' ', $change;
- # TODO: Ignore changes prior to $timeoflastupdate
- # TODO: Ignore malformed lines, especially wacky paths that could be malicious
- $files{$path} = $op;
- print STDERR "Marked $path as '$op'\n" if $istest;
- }
-}
-
-# Fetch all files whose last operation was "add" or "change"
-# Delete all files whose last operation was "delete"
-while (my ($file, $op) = each %files) {
- if ($op eq "delete") {
- if (-e "$workingdir/$file") {
- # delete: if the file exists, remove it
- print STDERR "deleting $workingdir/$file\n" if $istest;
- unlink "$workingdir/$file" or die "Can't delete $workingdir/$file ($!)";
- }
- else {
- print STDERR "not deleting $workingdir/$file beacuse it doesn't exist\n" if $istest;
- }
- }
- elsif ($op eq "add" or $op eq "change") {
- # add/change: re-fetch the file
- my $content = fetchFile("$remoteroot/$file","$workingdir/$file");
- }
- else {
- die "Unknown operation '$op'";
- }
-}
-
-
-# update the last "up-to-date" time
-saveLastUpdateTime() unless $istest;
-
-# finish
-exit 0;
-
###############
# SUBROUTINES #
###############
}
# return an array of filename
-sub getChangesFileList()
+sub getChangesFileList($$)
{
+ my ($fromtime, $totime) = @_;
my @files;
- for (my $time = str2time(time2str("%Y:%m:%dT00:00:00", $timeoflastupdate));
- $time < $timenow;
+ for (my $time = str2time(time2str("%Y:%m:%dT00:00:00", $fromtime));
+ $time < $totime;
$time += 86400) {
push @files, time2str("changes%Y%m%d.txt", $time);
}
# get the directory part of a filename
sub dirPart($) {
my $dir = $_[0];
- $dir =~ s{/[^/]*$}{};
+ $dir =~ s{/+[^/]*$}{};
return $dir;
}
+# ensure the given directory exists (like mkdir -p)
+sub ensureDir($) {
+ my $dir = $_[0];
+ if (! -e $dir) {
+ my $parent = dirPart($dir);
+ &ensureDir($parent) if ($parent);
+ mkdir $dir or die "Can't create directory $dir ($!)";
+ }
+}
+
# get a file, optionally saving it locally
sub fetchFile($;$) {
my ($remotefile, $localfile) = @_;
- if ($istest) {
+ if ($verbose) {
print STDERR "fetching $remotefile";
print STDERR " as $localfile" if $localfile;
}
my $resp = $ua->request(HTTP::Request->new(GET => "$remotefile"));
if ($resp->is_success) {
if ($localfile) {
- print STDERR " -> success\n" if $istest;
- my $localdir = dirPart($localfile);
- if (! -e $localdir) {
- mkdir $localdir or die "Can't create directory $localdir ($!)";
- }
+ print STDERR " -> success\n" if $verbose;
+ ensureDir(dirPart($localfile));
open (LOCAL, ">", "$localfile") or die "Can't open $localfile for writing ($!)";
print LOCAL $resp->content or die "Error writing $localfile ($!)";
return $resp->content;
}
else {
- print STDERR " -> failed\n" if $istest;
+ print STDERR " -> failed\n" if $verbose;
die "Can't fetch $remotefile (".$resp->status_line.")";
}
}
sub getChangesFile($) {
return fetchFile($changesroot."/".$_[0], $changesdir."/".$_[0]);
}
+
+
+
+##################
+# the program flow
+##################
+
+# first work out when the last time we were up to date is and
+# find present time.
+my $timeoflastupdate = findLastUpdateTime();
+
+if ($verbose) {
+ print STDERR "timenow is ".time2str("%c",$timenow)." \n";
+ print STDERR "timeoflastupdate is ".time2str("%c",$timeoflastupdate)." \n\n";
+}
+
+# Now we know which days' changes we need to get from the server
+my @changesfiles = getChangesFileList($timeoflastupdate, $timenow);
+if ($verbose or $listchangefiles) {
+ foreach my $file (@changesfiles) { print STDERR "using changes file $file\n"; }
+ exit 0 if $listchangefiles;
+}
+
+# get the changes files
+my @changesfilecontent;
+foreach my $file (@changesfiles) { push @changesfilecontent, getChangesFile($file); }
+
+# if the file has not changed (response code 304) then ignore it
+
+# iterate over all the fetched files, building up a list of files
+# to fetch/delete
+my %files;
+foreach my $changes (@changesfilecontent) {
+ my @changes = split /[\r\n]+/, $changes;
+ foreach my $change (@changes) {
+ my ($time, $op, $path) = split ' ', $change;
+ # Strip scheme and host from absolute URLs
+ $path =~ s{^[a-z]+://[a-z0-9\-\.]+/}{/};
+ # TODO: Ignore changes prior to $timeoflastupdate
+ # TODO: Ignore malformed lines, especially wacky paths that could be malicious
+ $files{$path} = $op;
+ print STDERR "Marked $path as '$op'\n" if $verbose;
+ }
+}
+
+# Fetch all files whose last operation was "add" or "change"
+# Delete all files whose last operation was "delete"
+while (my ($file, $op) = each %files) {
+ if ($op eq "delete") {
+ if (-e "$workingdir/$file") {
+ # delete: if the file exists, remove it
+ print STDERR "deleting $workingdir/$file\n" if $verbose;
+ unlink "$workingdir/$file" or die "Can't delete $workingdir/$file ($!)";
+ }
+ else {
+ print STDERR "not deleting $workingdir/$file beacuse it doesn't exist\n" if $verbose;
+ }
+ }
+ elsif ($op eq "add" or $op eq "change" or $op eq "Modification") {
+ # add/change: re-fetch the file
+ my $content = fetchFile("$remoteroot/$file","$workingdir/$file");
+ }
+ else {
+ die "Unknown operation '$op'";
+ }
+}
+
+
+# update the last "up-to-date" time
+saveLastUpdateTime();
+
+# finish
+exit 0;
+
+