Use CSV as it's much quicker and easier than scraping HTML :-)

This commit is contained in:
Matthew Slowe 2023-10-20 08:04:04 +01:00
parent fdcd4e5fe1
commit 39d1a5e328

51
csv.pl Normal file
View File

@ -0,0 +1,51 @@
#!/usr/bin/env perl -w
#
$|=1 if $ENV{DEBUG};
use strict;
use diagnostics;
use DBI;
use Text::CSV;
use Date::Manip;
use LWP::UserAgent;
my $ua = LWP::UserAgent->new;
$ua->agent("Munin[river_levels_]");
my @stations = qw(
1129 1135 1133 1143 1132 1137 1134 1145 1139 1131 1130 1144 9274
);
my $base = "https://check-for-flooding.service.gov.uk/station-csv/";
my $db = DBI->connect("dbi:SQLite:dbname=rivers.db") or die $DBI::errstr;
my $recordlevel = $db->prepare("REPLACE INTO levels (stationid, timestamp, level) VALUES (?,?,?)") or die $db->errstr;
my $getlastrecord = $db->prepare("SELECT MAX(timestamp) FROM levels WHERE stationid=?") or die $db->errstr;
sub getlatest($) {
my ($id) = @_;
$getlastrecord->execute($id);
my ($lastrecord) = $getlastrecord->fetchrow();
return $lastrecord ? $lastrecord : "0";
}
foreach my $station (@stations) {
print STDERR "[$station] Requesting $base$station\n" if $ENV{R_DEBUG};
my $req = HTTP::Request->new(GET => $base . $station);
my $res = $ua->request($req);
my $content = $res->content;
my $last = getlatest($station);
print "[$station] Last record was $last\n" if $ENV{R_DEBUG};
foreach my $line (sort split(/^/m, $content)) {
next if $line =~ /Timestamp (UTC)/;
chomp($line);
my ($ts, $level) = split(/,/, $line);
if($ts le $last) {
print "[$station] Skipping $level at $ts\n" if $ENV{R_DEBUG};
} else {
print "[$station] Recording $level at $ts\n" if $ENV{R_DEBUG};
$recordlevel->execute($station, $ts, $level);
}
}
}