staticExercisesAPI.py 7.98 KB
Newer Older
1 2 3 4 5 6 7 8 9
import json
import os
import re
import string
from datetime import datetime
from decimal import Decimal, ROUND_HALF_UP
from io import BytesIO
from tempfile import mkstemp
from time import time
10
from typing import Dict, List, Set, Match, Tuple, Union
11 12
from zipfile import ZipFile

13
import connexion
14
import requests
15
from connexion.lifecycle import ConnexionResponse
16 17 18 19 20 21 22
from requests import Response

from mcserver.app.models import StaticExercise
from mcserver.app.services import NetworkService, AnnotationService
from mcserver.config import Config


23 24 25 26 27 28 29
def get() -> Union[Response, ConnexionResponse]:
    """ The GET method for the StaticExercises REST API. It provides a list of static exercises
    and their respective URLs in the frontend. """
    # TODO: WRITE AND READ LAST UPDATE TIME FROM THE DATABASE
    if datetime.fromtimestamp(time() - Config.INTERVAL_STATIC_EXERCISES) > NetworkService.exercises_last_update \
            or len(NetworkService.exercises) == 0:
        return update_exercises()
30 31
    return NetworkService.make_json_response(
        {x: NetworkService.exercises[x].to_dict() for x in NetworkService.exercises})
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116


def get_relevant_strings(response: Response):
    """ Extracts from the exercises all inflected Latin words that serve as solutions. """
    relevant_strings_dict: Dict[str, Set[str]] = {}
    with ZipFile(BytesIO(response.content)) as zip_file:
        fill_blanks_black_list: Set[str] = {"3", "5", "8", "10"}
        multi_choice_black_list: Set[str] = {"19", "20", "21", "22", "23", "24"}
        files: List[str] = zip_file.namelist()
        for name in [x for x in files if x.endswith(".json") and x.split("/")[-2] == "content"]:
            name_parts: List[str] = name.split("/")
            file_name: str = name_parts[-1].split("_")[0]
            exercise_type: str = name_parts[-3]
            url: str = Config.PUBLIC_FRONTEND_URL + "exercise?type=" + exercise_type + "&file=" + file_name
            content: dict = json.loads(zip_file.read(name).decode("utf-8"))
            if url not in relevant_strings_dict:
                relevant_strings_dict[url] = set()
            if Config.H5P_DRAG_TEXT in name:
                text_field_content: str = content["textField"]
                asterisks: List[int] = [i for i, char in enumerate(text_field_content) if char == "*"]
                for i in range(round(len(asterisks) / 2)):
                    solution_text: str = text_field_content[(asterisks[i * 2] + 1):asterisks[(i * 2) + 1]]
                    for target in solution_text.split(":")[0].strip().split():
                        relevant_strings_dict[url].add(target)
            elif Config.H5P_FILL_BLANKS in name:
                handle_fill_blanks(content, file_name, fill_blanks_black_list, url, relevant_strings_dict)
            elif Config.H5P_MULTI_CHOICE in name and file_name not in multi_choice_black_list:
                handle_multi_choice(content, url, relevant_strings_dict, file_name)
            elif Config.H5P_VOC_LIST in name:
                handle_voc_list(content, url, relevant_strings_dict)
    return relevant_strings_dict


def handle_fill_blanks(content: dict, file_name: str, fill_blanks_black_list: Set[str], url: str,
                       relevant_strings_dict: Dict[str, Set[str]]):
    """ Extracts from a fill_blanks exercises all inflected Latin words that serve as solutions. """
    questions: List[str] = content["questions"]
    for i in range(len(questions)):
        asterisks: List[int] = [i for i, char in enumerate(questions[i]) if char == "*"]
        if file_name in {"1", "6"}:
            asterisks = asterisks[:2]
        elif file_name in {"4"}:
            asterisks = asterisks[:4]
        elif file_name in {"13"} and i == 1:
            asterisks = [k for k, char in enumerate(questions[i]) if char == '"']
        for j in range(round(len(asterisks) / 2)):
            solution_text: str = questions[i][(asterisks[j * 2] + 1):asterisks[(j * 2) + 1]]
            target: str = solution_text.split(":")[0].strip()
            if file_name in fill_blanks_black_list:
                target = questions[i][3:questions[i].find(" ")]
            for word in [y for x in target.split("/") for y in x.split(",")[0].strip().split()]:
                relevant_strings_dict[url].add(word)


def handle_multi_choice(content: dict, url: str, relevant_strings_dict: Dict[str, Set[str]], file_name: str):
    """ Extracts from a multi_choice exercises all inflected Latin words that serve as solutions. """
    question_text: str = content["question"]
    match: Match = re.search(r"<em>(.*)</em>", question_text)
    if not match:
        match = re.search(r"<b>(.*?)</b>", question_text)
        if match and file_name not in {"9", "18"}:
            relevant_strings_dict[url].add(match.group(1))
        else:
            answers: List[dict] = content["answers"]
            correct_answers: List[dict] = [x for x in answers if x["correct"]]
            for answer in correct_answers:
                text: str = answer["text"]
                relevant_strings_dict[url].add(text.replace("<div>", "").replace("</div>", "").strip())
    else:
        match_string: str = match.group(1)
        for solution in match_string.translate(str.maketrans("", "", string.punctuation)).split():
            relevant_strings_dict[url].add(solution)


def handle_voc_list(content: dict, url: str, relevant_strings_dict: Dict[str, Set[str]]):
    """ Extracts from a voc_list exercises all inflected Latin words that serve as solutions. """
    questions: List[str] = content["questions"]
    # don't use round(x) because it will round 0.5 to 0 (>> rounding to nearest even number)
    for i in range(int(Decimal((len(questions) / 2)).quantize(0, ROUND_HALF_UP))):
        match: Match = re.search(r"<h4>(.*)</h4>", questions[i * 2])
        match_string: str = match.group(1)
        match_parts: List[str] = match_string.translate(str.maketrans("", "", string.punctuation)).split()
        relevant_strings_dict[url].add(match_parts[0])


117
def update_exercises() -> Union[Response, ConnexionResponse]:
118 119 120 121
    """ Gets all static exercises from the frontend code repository and looks for the lemmata in them."""
    # TODO: check last update of the directory before pulling the whole zip archive
    response: Response = requests.get(Config.STATIC_EXERCISES_REPOSITORY_URL, stream=True)
    if not response.ok:
122 123
        return connexion.problem(
            503, Config.ERROR_TITLE_SERVICE_UNAVAILABLE, Config.ERROR_MESSAGE_SERVICE_UNAVAILABLE)
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
    relevant_strings_dict: Dict[str, Set[str]] = get_relevant_strings(response)
    file_dict: Dict = {}
    lemma_set: Set[str] = set()
    for url in relevant_strings_dict:
        for word in relevant_strings_dict[url]:
            if word not in lemma_set:
                lemma_set.add(word)
                input_bytes = bytearray(word, encoding='utf-8', errors='strict')
                file_handler, file_path = mkstemp()
                os.write(file_handler, input_bytes)
                file_dict[file_path] = file_handler
    result_string: str = AnnotationService.get_udpipe("", False, file_dict)
    search_results: List[Tuple[str, str]] = re.findall(r"1\t([a-zA-Z]*)\t([a-zA-Z]*)", result_string)
    search_results_dict: Dict[str, int] = {item[0]: i for (i, item) in enumerate(search_results)}
    for url in relevant_strings_dict:
        # the URN points to Cicero's letters to his brother Quintus, 1.1.8-1.1.10
140 141
        NetworkService.exercises[url] = StaticExercise(
            solutions=[], urn="urn:cts:latinLit:phi0474.phi058.perseus-lat1:1.1.8-1.1.10")
142 143 144 145 146 147
        for word in relevant_strings_dict[url]:
            # UDpipe cannot handle name abbreviations, so remove the punctuation and only keep the upper case letter
            if word[-1] in string.punctuation:
                word = word[:-1]
            NetworkService.exercises[url].solutions.append(list(search_results[search_results_dict[word]]))
    NetworkService.exercises_last_update = datetime.fromtimestamp(time())
148 149
    return NetworkService.make_json_response(
        {x: NetworkService.exercises[x].to_dict() for x in NetworkService.exercises})