Skip to content

Commit

Permalink
Now uses grammar endpoint on api.greynir.is. Still synchronous and do…
Browse files Browse the repository at this point in the history
…es not display progress
  • Loading branch information
KariSt1 committed Sep 11, 2023
1 parent 9c7d637 commit f826164
Show file tree
Hide file tree
Showing 5 changed files with 133 additions and 22 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -137,3 +137,7 @@ mypy.ini
.pyre/

test.json

# Resoures folder for keys
resources/*
!resources/.gitkeep
Empty file added resources/.gitkeep
Empty file.
21 changes: 19 additions & 2 deletions routes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,15 @@
"""

from pathlib import Path
from typing import TYPE_CHECKING, Tuple, Dict, Any, Callable, Optional, Union, cast

import logging
import threading
import time
import uuid
import json
from functools import wraps
from functools import lru_cache, wraps
from datetime import datetime, timedelta

from flask import (
Expand All @@ -66,6 +68,8 @@

_TRUTHY = frozenset(("true", "1", "yes"))

_RESOURCES_DIR = Path(__file__).parent.parent.resolve() / "resources"

cache = current_app.config["CACHE"]
routes: Blueprint = Blueprint("routes", __name__)

Expand Down Expand Up @@ -120,6 +124,20 @@ def better_jsonify(**kwargs: Any) -> Response:
return resp


@lru_cache(maxsize=32)
def read_txt_api_key(key_name: str, *, folder: Path = _RESOURCES_DIR) -> str:
"""
Read the given key from a text file in resources directory. Cached.
Optionally provide a different path to the folder containing the key file.
"""
p: Path = folder / f"{key_name}.txt"
try:
return p.read_text().strip()
except FileNotFoundError:
logging.warning(f"API key file {p} not found in {folder}")
return ""


def text_from_request(
rq: Request, *, post_field: Optional[str] = None, get_field: Optional[str] = None
) -> str:
Expand Down Expand Up @@ -288,7 +306,6 @@ def async_task(f: Callable[[Any], Response]) -> Callable[[Any], Tuple[Any, ...]]

@wraps(f)
def wrapped(*args: Any, **kwargs: Any) -> Tuple[Any, ...]:

# Assign a unique id to each asynchronous task
task_id = uuid.uuid4().hex

Expand Down
51 changes: 48 additions & 3 deletions routes/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
import time
import threading
import json
import requests
import uuid
from datetime import datetime, timedelta
from functools import partial
Expand All @@ -71,7 +72,7 @@
from db import SessionContext
from db.models import Correction

from . import routes, better_jsonify, text_from_request
from . import routes, better_jsonify, text_from_request, read_txt_api_key


T = TypeVar("T")
Expand All @@ -91,6 +92,9 @@
# By default, use all available CPU cores except one
POOL_SIZE = int(os.environ.get("POOL_SIZE", multiprocessing.cpu_count() - 1))

_GREYNIR_SEQ_API_KEY_FILENAME = "GreynirSeqAPIKey"
_GREYNIR_SEQ_API_GRAMMAR_URL = "https://api.greynir.is/grammar/"


class RequestData:

Expand Down Expand Up @@ -277,6 +281,49 @@ def opts_from_request(rq: Request) -> Dict[str, Any]:
return d


@routes.route("/grammar", methods=["POST"])
@routes.route("/grammar/v<int:version>", methods=["POST"])
def correct_proxy(version: int = 1) -> Response:
"""Correct text provided by the user, i.e. not coming from an article.
This can be either an uploaded file or a string. This is a proxy
used to send the request to api.greynir.is."""
valid, result = validate(request, version)
if not valid:
assert isinstance(result, Response)
return result
assert isinstance(result, str)

# Retrieve options flags from the request
opts = opts_from_request(request)

api_key = read_txt_api_key(_GREYNIR_SEQ_API_KEY_FILENAME)
if not api_key:
return better_jsonify(
valid=False,
reason="GreynirSeq API key not found",
)

def grammar_request() -> Response:
"""Send the request to api.greynir.is/grammar"""
request = requests.post(
_GREYNIR_SEQ_API_GRAMMAR_URL,
json=dict(text=result, **opts),
headers={
"Content-Type": "application/json;charset=utf-8",
"X-API-Key": api_key,
},
)

response = Response(
response=request.text,
status=request.status_code,
headers={"Content-Type": "application/json; charset=utf-8"},
)
return response

return grammar_request()


@routes.route("/correct.task", methods=["POST"])
@routes.route("/correct.task/v<int:version>", methods=["POST"])
def correct_async(version: int = 1) -> Response:
Expand Down Expand Up @@ -343,7 +390,6 @@ def validate(request: Request, version: int) -> Tuple[bool, Union[str, Response]

file = request.files.get("file")
if file is not None:

# Handle uploaded file
# file is a proxy object that emulates a Werkzeug FileStorage object
mimetype = file.mimetype
Expand All @@ -361,7 +407,6 @@ def validate(request: Request, version: int) -> Tuple[bool, Union[str, Response]
return False, better_jsonify(valid=False, reason="Error reading file")

else:

# Handle POSTed form data, JSON, or plain text string
try:
text = text_from_request(request)
Expand Down
79 changes: 62 additions & 17 deletions templates/correct.html
Original file line number Diff line number Diff line change
Expand Up @@ -161,12 +161,56 @@ <h3>Tölfræði</h3>
// Note: the following should not be a serverJsonQuery call,
// this is done via a HTTP form post
// Endpoints with .task or .api suffix are not cached
serverQuery('/correct.task',
serverQuery( "/grammar",
{
text: txt
text: txt,
"options": {
"annotate_unparsed_sentences": true,
"suppress_suggestions": false,
"ignore_words": [],
"ignore_rules": [ "ASLBRODDVANTAR",
"ASLSTAFVANTAR",
"ASLVITLSTAF",
"ASLVIXL",
"BEYGVILLA",
"C005_w",
"C005",
"EY4EI",
"P_DIR_LOC",
"P_DOUBLE_DEFINITE",
"P_MOOD_ACK",
"P_MOOD_COND",
"P_MOOD_PURP",
"P_MOOD_REL",
"P_MOOD_TEMP_w",
"P_NT_Einkunn",
"P_NT_EndingIR",
"P_NT_FsMeðFallstjórn",
"P_NT_ÍTölu",
"P_NT_Manns",
"P_NT_Né",
"P_NT_Sem_w",
"P_NT_ÞóAð",
"P_SINGSUB_GEN",
"P_VeraAð",
"P_wrong_case",
"P_WRONG_PLACE_PP",
"P_WRONG_PREP_AÐ",
"RFTGR",
"S001",
"W001_w",
"Y001_w",
"Ý4Í",
"P_WRONG_CASE_nf_þf",
"P_WRONG_CASE_nf_þgf",
"P_WRONG_CASE_þgf_nf",
"P_WRONG_PLACE_PP",
],
"custom": ""
},
},
this.start.bind(this),
null,
this.populateResult.bind(this),
this.handleError.bind(this)
);
};
Expand All @@ -176,10 +220,11 @@ <h3>Tölfræði</h3>
this.updateProgress();
// Send off ajax request
$.ajax({
url: '/correct.task',
url: '/grammar',
type: 'POST',
data: fd,
success: this.start.bind(this),
success: null,
complete: this.populateResult.bind(this),
error: this.handleError.bind(this),
cache: false,
contentType: false,
Expand All @@ -200,7 +245,7 @@ <h3>Tölfræði</h3>
CorrectionTask.prototype.start = function (json, status, resp) {
// Called when the task has been started and we're being informed
// of the status (polling) URL to use
if (resp.status == 202) {
if (resp.status == 200) {
// This is the expected status: obtain the status URL
// from the Location header
this.url = resp.getResponseHeader("Location");
Expand Down Expand Up @@ -243,7 +288,7 @@ <h3>Tölfræði</h3>

CorrectionTask.prototype.checkProgress = function (json, status, resp) {
// Response from the server to a progress check
if (resp.status == 202) {
if (resp.status == 200) {
// Still in progress
this.progress = json.progress;
this.updateProgress();
Expand Down Expand Up @@ -305,20 +350,20 @@ <h3>Tölfræði</h3>
}
};

CorrectionTask.prototype.populateResult = function (json) {
CorrectionTask.prototype.populateResult = function (json, status, resp) {
// Processing completed: display the result
this.wait(false);
if (!json.valid || json.result === undefined || json.result === null) {
if (!json.responseJSON.valid || json.responseJSON === undefined || json.responseJSON === null) {
showError("<b>Villa kom upp</b> í samskiptum við netþjón Greynis");
}
else {
// Display the paragraphs, sentences and tokens
// along with the sentence-level annotations
displayAnnotations(json.result);
populateStats(json.stats);
displayAnnotations(json.responseJSON.paragraphs);
populateStats(json.responseJSON.stats);
// Replace text in text field
if (json.text) {
$("#txt").val(json.text);
if (json.responseJSON.text) {
$("#txt").val(json.responseJSON.text);
}
}
};
Expand Down Expand Up @@ -445,6 +490,7 @@ <h3>Tölfræði</h3>
annSpan = []; // Array of annotation token spans
var unknownWords = {}; // Dict of already seen unknown words
$.each(j, function (pix, p) {
p_sentences = p.sentences;
// We create a div for each paragraph
x += "<div class='correct'>\n";
// Left column: paragraph text
Expand All @@ -453,7 +499,7 @@ <h3>Tölfræði</h3>
var right = "";
// Third child: SVG drawing stuff
var canvas = "";
$.each(p, function (sentence_index, s) {
$.each(p_sentences, function (sentence_index, s) {
// Sentence s
const parsed = !s.annotations || !s.annotations.some((a) => a.code === "E001");
lastSp = TP_NONE;
Expand All @@ -468,9 +514,9 @@ <h3>Tölfræði</h3>
if (TP_SPACE[lastSp - 1][thisSp - 1] && tix)
left += " ";
lastSp = thisSp;
var tx = t.x;
var tx = t.text;
var cls = [];
if (t.k === TOK_PUNCTUATION) {
if (t.kind === TOK_PUNCTUATION) {
// Add space around em-dash
if (tx === "—")
tx = " — ";
Expand Down Expand Up @@ -523,7 +569,6 @@ <h3>Tölfræði</h3>
$.each(s.annotations, function (aix, a) {
// Annotation a
// We do not show repeated annotations for unknown words
console.log(a.code)
a.divId = undefined;
if (a.code !== "U001" || unknownWords[a.text] === undefined) {
var cls = isWarning(a.code) ? " warning" : "";
Expand Down

0 comments on commit f826164

Please sign in to comment.