commit 7965c415164b017261856102198cfd8e0a206097 Author: Matthew Slowe Date: Sun May 31 10:06:14 2020 +0100 working poc diff --git a/README.md b/README.md new file mode 100644 index 0000000..391eca8 --- /dev/null +++ b/README.md @@ -0,0 +1,33 @@ +Self-defined checker +==================== + +Proof-of-concept language checker using the [Self-defined project](https://www.selfdefined.app/). + +Requirements +------------ + +* J2EE compatible web server (eg. Tomcat) + +Preparation +----------- + +1. Clone the repository +2. Get the dependencies and ensure they're either in `WEB-INF/lib` or the webserver's shared `lib`: + * SQLite JDBC driver (eg. https://bitbucket.org/xerial/sqlite-jdbc/downloads/sqlite-jdbc-3.30.1.jar) + * JSON library (eg. http://stleary.github.io/JSON-java/index.html) +3. Compile the code (YMMV) + ```shell + cd WEB-INF + export LIBPATH=/path/to/shared/java/libraries + javac -d classes -classpath src:classes:lib/json-20200518.jar:$LIBPATH/el-api.jar:$LIBPATH/servlet-api.jar:$LIBPATH/sqlite-jdbc.jar src/uk/org/mafoo/selfdefined/Checker.java + ``` +4. Build a war file + ```shell + jar -cvf self-defined.war . + ``` +5. Deploy! (move the war file to your webservers `/webapps` folder and wait for it to deploy) + +Notes +----- + +The included database is build using the data from https://github.com/tatianamac/selfdefined. diff --git a/WEB-INF/classes/.keep b/WEB-INF/classes/.keep new file mode 100644 index 0000000..e69de29 diff --git a/WEB-INF/files/defs.db b/WEB-INF/files/defs.db new file mode 100644 index 0000000..f6bc49f Binary files /dev/null and b/WEB-INF/files/defs.db differ diff --git a/WEB-INF/lib/.keep b/WEB-INF/lib/.keep new file mode 100644 index 0000000..e69de29 diff --git a/WEB-INF/src/uk/org/mafoo/selfdefined/Checker.java b/WEB-INF/src/uk/org/mafoo/selfdefined/Checker.java new file mode 100644 index 0000000..97009b9 --- /dev/null +++ b/WEB-INF/src/uk/org/mafoo/selfdefined/Checker.java @@ -0,0 +1,176 @@ +package uk.org.mafoo.selfdefined; + +import javax.servlet.annotation.WebServlet; +import javax.management.RuntimeErrorException; +import javax.servlet.ServletException; +import javax.servlet.annotation.WebServlet; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import java.io.*; +import java.sql.*; +import java.util.ArrayList; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.List; + +import org.sqlite.*; + +@WebServlet("/Checker") +public class Checker extends HttpServlet { + + Connection db; + PreparedStatement p_checkWord, p_getWord; + String urlBase = "https://www.selfdefined.app/definitions/"; + + public void init() throws ServletException { + // Do required initialization + try { + db = DriverManager.getConnection( + "jdbc:sqlite:" + getServletContext().getRealPath("/WEB-INF/files/defs.db")); + + p_checkWord = db.prepareStatement("SELECT word, ref FROM words WHERE word LIKE ?"); + p_getWord = db.prepareStatement("SELECT title, slug, flag_level, flag_text, flag_for FROM definitions WHERE title LIKE ?"); + } catch (SQLException e) { + throw new RuntimeException("Failed to load database: " + e.toString()); + } + } + + private HashMap getWord(String word) { + + try { + p_getWord.setString(1, word); + ResultSet rs = p_getWord.executeQuery(); + if(rs.next()) { + HashMap _return = new HashMap(); + _return.put("word", rs.getString(1)); + _return.put("slug", rs.getString(2)); + _return.put("flagLevel", rs.getString(3)); + _return.put("flagText", rs.getString(4)); + _return.put("flagFor", rs.getString(5)); + return _return; + } else { + return null; + } + } catch (SQLException e) { + return null; + } + } + + private void checkWord(String word, HashMap wordMap, HashMap wordCounts) { + assert(p_checkWord != null); + assert(word != null); + + wordCounts.put(word, wordCounts.getOrDefault(word, 0) + 1); + + // Do we already know the answer? + if(wordMap.containsKey(word)) { + // If so, just increment the wordcount and return + return; + } + + try { + p_checkWord.setString(1, word); + ResultSet rs = p_checkWord.executeQuery(); + while(rs.next()) { + if(rs.getString(1).equalsIgnoreCase(rs.getString(2))) { + wordMap.put(rs.getString(1), null); + } else { + wordMap.put(rs.getString(1), rs.getString(2)); + } + } + } catch (SQLException e) { + // FIXME + } + } + + private Report process(String[] input) { + HashMap wordMap = new HashMap(); + HashMap wordCounts = new HashMap(); + + for(String line : input) { + for(String word : line.split("\\s+")) { + checkWord(word, wordMap, wordCounts); + } + } + + Report report = new Report(); + for (String word : wordMap.keySet()) { + String ref = wordMap.get(word); + if(ref == null) ref = word; + HashMap canonicalWord = getWord(ref); + report.add( + canonicalWord.get("word"), + wordCounts.get(word), + canonicalWord.get("flagLevel"), + canonicalWord.get("flagText"), + canonicalWord.get("flagFor"), + urlBase.concat(canonicalWord.get("slug")) + ); + } + + return report; + } + + public void service(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { + String[] vals = req.getParameterValues("input"); + if(vals != null) { + Report r = process(vals); + PrintWriter out = resp.getWriter(); + + ArrayList acceptHeaders = new ArrayList(); + for(Enumeration acceptHeaders_e = req.getHeaders("Accept"); acceptHeaders_e.hasMoreElements(); ) { + acceptHeaders.add(acceptHeaders_e.nextElement()); + } + if(acceptHeaders.contains("text/json")) { + resp.setContentType("text/json"); + out.println(r.toJSONObject()); + } else { + out.println(""); + out.println(""); + out.println(""); + out.println(" Language Checker"); + out.println(" "); + out.println(" "); + out.println(""); + out.println(""); + out.println("

Language checker results

"); + out.println(r.toHTML()); + out.println(""); + } + + } else { + throw new RuntimeException("No data"); + } + } + + // public void doPost(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { + + // ArrayList arr = new ArrayList(); + + // try { + // BufferedReader inputData = req.getReader(); + // String line; + // while((line = inputData.readLine()) != null) { + // arr.add(line); + // } + // } catch (IOException e) { + // // FIXME + // } + + // if(arr.size() > 0) { + // Report r = process((String[]) arr.toArray(new String[0])); + // PrintWriter out = resp.getWriter(); + // out.println(r.toJSONObject()); + // } else { + // throw new RuntimeException("No data"); + // } + + // } + + public void destroy() { + // Close the database + } + +} \ No newline at end of file diff --git a/WEB-INF/src/uk/org/mafoo/selfdefined/Report.java b/WEB-INF/src/uk/org/mafoo/selfdefined/Report.java new file mode 100644 index 0000000..1d98e60 --- /dev/null +++ b/WEB-INF/src/uk/org/mafoo/selfdefined/Report.java @@ -0,0 +1,62 @@ +package uk.org.mafoo.selfdefined; + +import java.util.ArrayList; +import org.json.*; + +class Report { + + ArrayList data; + + public Report() { + data = new ArrayList(); + } + + public void add(String word, Integer count, String flagLevel, String flagText, String flagFor, String url) { + data.add(new ReportWord(word, count, flagLevel, flagText, flagFor, url)); + } + + public String toString() { + StringBuilder s = new StringBuilder(); + s.append("["); + int n = 0; + for (ReportWord w : data) { + if(n++ > 0) { s.append(","); } + s.append(w); + } + s.append("]"); + + return s.toString(); + } + + public JSONObject toJSONObject() { + JSONArray arr = new JSONArray(); + for(ReportWord w : data) { + arr.put(w.toJSONObject()); + } + + JSONObject obj = new JSONObject(); + obj.put("results", arr); + return obj; + } + + public String toHTML() { + StringBuilder str = new StringBuilder(); + str.append("\n"); + str.append(" \n"); + str.append(" \n"); + str.append(" \n"); + str.append(" \n"); + str.append(" \n"); + str.append(" \n"); + str.append(" \n"); + str.append(" \n"); + str.append(" \n"); + for(ReportWord w : data) { + str.append(w.toHTML()); + } + str.append(" \n"); + str.append("
WordCountFlag LevelFlag TextSee-AlsoURL
\n"); + + return str.toString(); + } +} \ No newline at end of file diff --git a/WEB-INF/src/uk/org/mafoo/selfdefined/ReportWord.java b/WEB-INF/src/uk/org/mafoo/selfdefined/ReportWord.java new file mode 100644 index 0000000..baad700 --- /dev/null +++ b/WEB-INF/src/uk/org/mafoo/selfdefined/ReportWord.java @@ -0,0 +1,54 @@ +package uk.org.mafoo.selfdefined; + +import org.json.JSONObject; + +class ReportWord { + String word; + Integer count; + String flagLevel; + String flagText; + String flagFor; + String url; + + public ReportWord(String word, Integer count, String flagLevel, String flagText, String flagFor, String url) { + this.word = word; + this.count = count; + this.flagLevel = flagLevel; + this.flagText = flagText; + this.flagFor = flagFor; + this.url = url; + } + + public String toString() { + return String.format( + "{word=%s,count=%d,flagLevel=%s,flagText=%s,flagFor=%s,url=%s}", + word, count, flagLevel, flagText, flagFor, url + ); + } + + public JSONObject toJSONObject() { + JSONObject jo = new JSONObject(); + jo.put("word", word); + jo.put("count", count); + jo.put("flagLevel", flagLevel); + jo.put("flagText", flagText); + jo.put("flagFor", flagFor); + jo.put("url", url); + + return jo; + } + + public String toHTML() { + StringBuilder str = new StringBuilder(); + str.append("\n"); + str.append(" "); str.append(word); str.append("\n"); + str.append(" "); str.append(count); str.append("\n"); + str.append(" "); str.append(flagLevel); str.append("\n"); + str.append(" "); str.append(flagText); str.append("\n"); + str.append(" "); str.append(flagFor); str.append("\n"); + str.append(" ref\n"); + str.append("\n"); + + return str.toString(); + } +} \ No newline at end of file diff --git a/WEB-INF/web.xml b/WEB-INF/web.xml new file mode 100644 index 0000000..c22acae --- /dev/null +++ b/WEB-INF/web.xml @@ -0,0 +1,21 @@ + + + + + S + S + + + CheckerServlet + uk.org.mafoo.selfdefined.Checker + + + + CheckerServlet + /c + + + \ No newline at end of file diff --git a/index.jsp b/index.jsp new file mode 100644 index 0000000..9c24daf --- /dev/null +++ b/index.jsp @@ -0,0 +1,22 @@ + + + + Language Checker + + + + +

Language checker

+

Language checker based on the Self Defined project

+
+ Self-Defined seeks to provide more inclusive, holistic, and fluid definitions to reflect the diverse perspectives of the modern world. + + With the foundation of vocabulary, we can begin to understand lived experiences of people different than us. Words can provide us with a sense of identify and allow us to find kinship through common experiences. +
+
+ + + +
+

You can also POST to the /Checker endpoint specifying Accept: text/json.

+ \ No newline at end of file