From 8f07a10c7f0775fd255357cf1744e5594ef8cf53 Mon Sep 17 00:00:00 2001 From: Matthew Slowe Date: Sat, 30 May 2020 15:25:18 +0100 Subject: [PATCH] proof of concept checker --- checker/Dockerfile | 2 + checker/check.pl | 63 ++++++++++++++++++++++++++++++++ checker/check.sh | 1 + checker/defs2db.pl | 91 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 157 insertions(+) create mode 100644 checker/Dockerfile create mode 100755 checker/check.pl create mode 100755 checker/check.sh create mode 100644 checker/defs2db.pl diff --git a/checker/Dockerfile b/checker/Dockerfile new file mode 100644 index 00000000..54d995a3 --- /dev/null +++ b/checker/Dockerfile @@ -0,0 +1,2 @@ +FROM alpine +RUN apk add perl perl-dbd-sqlite perl-file-slurp perl-yaml-libyaml perl-json diff --git a/checker/check.pl b/checker/check.pl new file mode 100755 index 00000000..5fd5a96f --- /dev/null +++ b/checker/check.pl @@ -0,0 +1,63 @@ +#!/usr/bin/env perl + +use warnings; +use strict; +use Data::Dumper; + +my $DEBUG = 0; +my $URL_BASE = 'https://www.selfdefined.app/definitions'; + +use DBI; +my $db = DBI->connect('DBI:SQLite:dbname=defs.db', '', '', { RaiseError => 1 }) + or die $DBI::errstr; + +my $p_lookup = $db->prepare('SELECT word, ref FROM words WHERE word LIKE ?'); +my %words; +sub lookup($) { + my $word = shift; + if($words{lc $word}) { + $words{lc $word}{count}++; + print STDERR $words{lc $word}{NO} ? '.' : '=' if $ENV{PROGRESS}; + return; + } + + my $res = $p_lookup->execute($word); + while (my $row = $p_lookup->fetchrow_hashref) { + $words{lc $word}{count}++; + $words{lc $row->{word}}{ref} = $row->{ref}; + print STDERR '+' if $ENV{PROGRESS}; + return; + } + + $words{lc $word}{NO}++; + print STDERR '.' if $ENV{PROGRESS}; +} + +# Process input +while(my $line = <>) { + foreach my $word (split(/\s+/, $line)) { + lookup($word); + } +} + +# Report +print join(',', qw(word count flag_level flag_text flag_for url)), "\n"; +my $p_word = $db->prepare('SELECT title, slug, flag_level, flag_text, flag_for FROM definitions WHERE title LIKE ?'); +foreach my $word (keys %words) { + next if $words{$word}{NO}; + if($words{$word}{ref}) { + $p_word->execute($words{$word}{ref}); + } else { + $p_word->execute($word); + } + my $row = $p_word->fetchrow_hashref(); + printf "%s,%d,%s,%s,%s,${URL_BASE}/%s\n", + lc $word, + $words{$word}{count}, + ($row->{flag_level} or ''), + ($row->{flag_text} or ''), + ($row->{flag_for} or ''), + $row->{slug} + ; + +} \ No newline at end of file diff --git a/checker/check.sh b/checker/check.sh new file mode 100755 index 00000000..e9c5ce2f --- /dev/null +++ b/checker/check.sh @@ -0,0 +1 @@ +elinks -dump "${1:?Need URL}" | perl check.pl \ No newline at end of file diff --git a/checker/defs2db.pl b/checker/defs2db.pl new file mode 100644 index 00000000..684651e9 --- /dev/null +++ b/checker/defs2db.pl @@ -0,0 +1,91 @@ +#!/usr/bin/env perl + +use warnings; +use strict; + +my $DEBUG = 0; +my $SEPERATOR = '---'; + +use File::Slurp qw(read_file); +use YAML::Any qw(LoadFile Load); +use DBI; +use JSON; +use Data::Dumper; + +sub _debug($;@) { + return unless $ENV{DEBUG} or $DEBUG; + my ($str, @params) = @_; + printf "DEBUG $str\n", @params; +} + +sub getFrontMatter($) { + my $str = shift; + unless(index($str, $SEPERATOR) == 0) { + warn "Initial separator not found"; + return undef; + } + my $next_seperator = index($str, $SEPERATOR, length($SEPERATOR)); + return substr($str, length($SEPERATOR)+1, $next_seperator-length($SEPERATOR)-1); +} + +my %fields; + +my $db = DBI->connect("DBI:SQLite:dbname=defs.db", '', '', { RaiseError => 1 }) + or die $DBI::errstr; + +my $p_def = $db->prepare(" + INSERT INTO definitions + (title, slug, defined, speech, skip_in_table_of_content, flag_level, flag_text, flag_for) + VALUES (?, ?, ?, ?, ?, ?, ?, ?)" + ); +# my $p_flag = $db->prepare("INSERT INTO flags (title, level, text, for) VALUES (?, ?, ?, ?)"); +my $p_readings = $db->prepare("INSERT INTO readings (title, text, href) VALUES (?, ?, ?)"); +my $p_alt_words = $db->prepare("INSERT INTO alt_words (title, alt_word) VALUES (?, ?)"); +my $p_sub_terms = $db->prepare("INSERT INTO sub_terms (title, text, full_title) VALUES (?, ?, ?)"); +my $p_data = $db->prepare("INSERT INTO data (title, yaml, json) VALUES (?, ?, ?)"); + +while(my $input = shift @ARGV) { + _debug("Parsing %s", $input); + my $input_data = read_file($input); + $input_data =~ s/\r//g; + my $fm_str = getFrontMatter($input_data); + my ($fm, @rest) = Load($fm_str); + + $p_def->execute( + $fm->{title}, + $fm->{slug}, + ($fm->{defined} or 0), + ($fm->{speech} or 'unknown'), + ($fm->{skip_in_table_of_content} or '0'), + $fm->{flag}->{level}, + $fm->{flag}->{text}, + $fm->{flag}->{for}, + ); + + foreach my $reading (@{$fm->{reading}}) { + $p_readings->execute( + $fm->{title}, + $reading->{text}, + $reading->{href}, + ); + } + + foreach my $alt_word (@{$fm->{alt_words}}) { + $p_alt_words->execute( + $fm->{title}, + $alt_word, + ); + } + + foreach my $sub_term (@{$fm->{sub_terms}}) { + $p_sub_terms->execute( + $fm->{title}, + $sub_term->{text}, + $sub_term->{full_title}, + ); + } + + $p_data->execute($fm->{title}, $fm_str, encode_json($fm)); +} + +$db->disconnect; \ No newline at end of file