use strict;
-use Cwd;
-use Date::Parse;
-use Date::Format;
+use File::stat;
use Getopt::Long;
+use HTTP::Status;
use LWP::UserAgent;
use Time::Local;
+use Date::Parse;
+use Date::Format;
+
+# This has name conflicts with Date::Parse and
+# Date::Format; don't import its names.
+use HTTP::Date ();
+
##################
# Global variables
##################
# LWP user agent for fetching files
# keep_alive is important, to avoid the overhead of
# establishing a new connection for each file we fetch
-my $ua = LWP::UserAgent->new(agent => "MirMirror/0.1",
- keep_alive => 1);
+my $ua = new LWP::UserAgent( agent => "MirMirror/0.1",
+ keep_alive => 1 );
###############
print STDERR " as $localfile" if $localfile;
}
- my $req = HTTP::Request->new(GET => "$remotefile");
- # TODO: If-Modified-Since
- my $resp = $ua->request(HTTP::Request->new(GET => "$remotefile"));
- if ($resp->is_success) {
+ my $req = new HTTP::Request(GET => "$remotefile");
+
+ if ($localfile and -e $localfile) {
+ # Don't fetch unless more recent than local copy
+ my $stat = stat($localfile);
+ $req->header("If-Modified-Since" => HTTP::Date::time2str($stat->mtime));
+ }
+ my $resp = $ua->request($req);
+ if ($resp->is_success) { # 2xx codes
+ my $mtime = HTTP::Date::str2time($resp->header("Last-Modified"));
if ($localfile) {
- print STDERR " -> success\n" if $verbose;
+ if ($verbose) {
+ print STDERR " -> success";
+ print STDERR "; mtime ".time2str("%c", $mtime) if $mtime;
+ print STDERR "\n";
+ }
ensureDir(dirPart($localfile));
open (LOCAL, ">", "$localfile") or die "Can't open $localfile for writing ($!)";
print LOCAL $resp->content or die "Error writing $localfile ($!)";
close LOCAL or die "Error writing $localfile ($!)";
- # TODO: set mtime from Last-Modified
+
+ if ($mtime) {
+ utime $mtime, $mtime, $localfile;
+ }
}
return $resp->content;
}
+ elsif ($resp->is_redirect) { # 3xx codes
+ if ($resp->code == RC_NOT_MODIFIED) { # 304
+ print STDERR " -> not modified\n" if $verbose;
+ open (LOCAL, "<", "$localfile") or die "Can't open $localfile ($!)";
+ local $/; # slurp whole file
+ my $content = <LOCAL>;
+ close LOCAL;
+ return $content;
+ }
+ print STDERR " -> redirect (".$resp->code.")\n" if $verbose;
+ die "Can't fetch $remotefile (got redirect, not yet handled)";
+ }
else {
- print STDERR " -> failed\n" if $verbose;
+ print STDERR " -> failed (".$resp->code.")\n" if $verbose;
die "Can't fetch $remotefile (".$resp->status_line.")";
}
}
}
# get the changes files
-my @changesfilecontent;
-foreach my $file (@changesfiles) { push @changesfilecontent, getChangesFile($file); }
+my %changesfilecontent;
+foreach my $file (@changesfiles) { $changesfilecontent{$file} = getChangesFile($file); }
# if the file has not changed (response code 304) then ignore it
# iterate over all the fetched files, building up a list of files
# to fetch/delete
my %files;
-foreach my $changes (@changesfilecontent) {
- my @changes = split /[\r\n]+/, $changes;
+foreach my $changes (@changesfiles) {
+ my $date = $changes;
+ $date =~ s{^(?:.*/)?changes([0-9]+)\.txt$}{$1} or die "Can't extract date from changes filename $changes";
+
+ my @changes = split /[\r\n]+/, $changesfilecontent{$changes};
foreach my $change (@changes) {
my ($time, $op, $path) = split ' ', $change;
+
+ # Ignore malformed lines, especially wacky paths that could be malicious
+ if ($time =~ /[^0-9:]/) {
+ die "Invalid time $time in $changes";
+ }
+
+ if ($path =~ m{(?:^|/)\.\.(?:/|$)}) {
+ die "Invalid path $path (contains ..) in $changes";
+ }
+
# Strip scheme and host from absolute URLs
$path =~ s{^[a-z]+://[a-z0-9\-\.]+/}{/};
- # TODO: Ignore changes prior to $timeoflastupdate
- # TODO: Ignore malformed lines, especially wacky paths that could be malicious
+
+ # Combine time with date and parse
+ $time = str2time("$date $time");
+ if (not defined $time) {
+ die "Failed to parse datetime '$date $time'";
+ }
+
+ # Ignore changes prior to $timeoflastupdate
+ next if $time < $timeoflastupdate;
+
$files{$path} = $op;
print STDERR "Marked $path as '$op'\n" if $verbose;
}