proof of concept checker

This commit is contained in:
Matthew Slowe 2020-05-30 15:25:18 +01:00
parent a150c749e1
commit 8f07a10c7f
4 changed files with 157 additions and 0 deletions

2
checker/Dockerfile Normal file
View File

@ -0,0 +1,2 @@
FROM alpine
RUN apk add perl perl-dbd-sqlite perl-file-slurp perl-yaml-libyaml perl-json

63
checker/check.pl Executable file
View File

@ -0,0 +1,63 @@
#!/usr/bin/env perl
use warnings;
use strict;
use Data::Dumper;
my $DEBUG = 0;
my $URL_BASE = 'https://www.selfdefined.app/definitions';
use DBI;
my $db = DBI->connect('DBI:SQLite:dbname=defs.db', '', '', { RaiseError => 1 })
or die $DBI::errstr;
my $p_lookup = $db->prepare('SELECT word, ref FROM words WHERE word LIKE ?');
my %words;
sub lookup($) {
my $word = shift;
if($words{lc $word}) {
$words{lc $word}{count}++;
print STDERR $words{lc $word}{NO} ? '.' : '=' if $ENV{PROGRESS};
return;
}
my $res = $p_lookup->execute($word);
while (my $row = $p_lookup->fetchrow_hashref) {
$words{lc $word}{count}++;
$words{lc $row->{word}}{ref} = $row->{ref};
print STDERR '+' if $ENV{PROGRESS};
return;
}
$words{lc $word}{NO}++;
print STDERR '.' if $ENV{PROGRESS};
}
# Process input
while(my $line = <>) {
foreach my $word (split(/\s+/, $line)) {
lookup($word);
}
}
# Report
print join(',', qw(word count flag_level flag_text flag_for url)), "\n";
my $p_word = $db->prepare('SELECT title, slug, flag_level, flag_text, flag_for FROM definitions WHERE title LIKE ?');
foreach my $word (keys %words) {
next if $words{$word}{NO};
if($words{$word}{ref}) {
$p_word->execute($words{$word}{ref});
} else {
$p_word->execute($word);
}
my $row = $p_word->fetchrow_hashref();
printf "%s,%d,%s,%s,%s,${URL_BASE}/%s\n",
lc $word,
$words{$word}{count},
($row->{flag_level} or ''),
($row->{flag_text} or ''),
($row->{flag_for} or ''),
$row->{slug}
;
}

1
checker/check.sh Executable file
View File

@ -0,0 +1 @@
elinks -dump "${1:?Need URL}" | perl check.pl

91
checker/defs2db.pl Normal file
View File

@ -0,0 +1,91 @@
#!/usr/bin/env perl
use warnings;
use strict;
my $DEBUG = 0;
my $SEPERATOR = '---';
use File::Slurp qw(read_file);
use YAML::Any qw(LoadFile Load);
use DBI;
use JSON;
use Data::Dumper;
sub _debug($;@) {
return unless $ENV{DEBUG} or $DEBUG;
my ($str, @params) = @_;
printf "DEBUG $str\n", @params;
}
sub getFrontMatter($) {
my $str = shift;
unless(index($str, $SEPERATOR) == 0) {
warn "Initial separator not found";
return undef;
}
my $next_seperator = index($str, $SEPERATOR, length($SEPERATOR));
return substr($str, length($SEPERATOR)+1, $next_seperator-length($SEPERATOR)-1);
}
my %fields;
my $db = DBI->connect("DBI:SQLite:dbname=defs.db", '', '', { RaiseError => 1 })
or die $DBI::errstr;
my $p_def = $db->prepare("
INSERT INTO definitions
(title, slug, defined, speech, skip_in_table_of_content, flag_level, flag_text, flag_for)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)"
);
# my $p_flag = $db->prepare("INSERT INTO flags (title, level, text, for) VALUES (?, ?, ?, ?)");
my $p_readings = $db->prepare("INSERT INTO readings (title, text, href) VALUES (?, ?, ?)");
my $p_alt_words = $db->prepare("INSERT INTO alt_words (title, alt_word) VALUES (?, ?)");
my $p_sub_terms = $db->prepare("INSERT INTO sub_terms (title, text, full_title) VALUES (?, ?, ?)");
my $p_data = $db->prepare("INSERT INTO data (title, yaml, json) VALUES (?, ?, ?)");
while(my $input = shift @ARGV) {
_debug("Parsing %s", $input);
my $input_data = read_file($input);
$input_data =~ s/\r//g;
my $fm_str = getFrontMatter($input_data);
my ($fm, @rest) = Load($fm_str);
$p_def->execute(
$fm->{title},
$fm->{slug},
($fm->{defined} or 0),
($fm->{speech} or 'unknown'),
($fm->{skip_in_table_of_content} or '0'),
$fm->{flag}->{level},
$fm->{flag}->{text},
$fm->{flag}->{for},
);
foreach my $reading (@{$fm->{reading}}) {
$p_readings->execute(
$fm->{title},
$reading->{text},
$reading->{href},
);
}
foreach my $alt_word (@{$fm->{alt_words}}) {
$p_alt_words->execute(
$fm->{title},
$alt_word,
);
}
foreach my $sub_term (@{$fm->{sub_terms}}) {
$p_sub_terms->execute(
$fm->{title},
$sub_term->{text},
$sub_term->{full_title},
);
}
$p_data->execute($fm->{title}, $fm_str, encode_json($fm));
}
$db->disconnect;