From a6a855f2ca3f44a80823ed648ef99f8cc45431ae Mon Sep 17 00:00:00 2001 From: Konstantin Schulz Date: Wed, 5 Aug 2020 15:02:15 +0200 Subject: [PATCH] simple database queries are now handled uniformly in the database service --- mc_backend/mcserver/app/__init__.py | 11 +- mc_backend/mcserver/app/api/corpusAPI.py | 12 +- mc_backend/mcserver/app/api/corpusListAPI.py | 11 +- mc_backend/mcserver/app/api/exerciseAPI.py | 13 +- .../mcserver/app/api/exerciseListAPI.py | 8 +- mc_backend/mcserver/app/api/fileAPI.py | 6 +- mc_backend/mcserver/app/api/h5pAPI.py | 10 +- mc_backend/mcserver/app/services/__init__.py | 5 +- .../app/services/annotationService.py | 8 -- .../mcserver/app/services/corpusService.py | 69 ++++++++++- .../mcserver/app/services/databaseService.py | 114 ++++-------------- .../mcserver/app/services/exerciseService.py | 43 +++++++ mc_backend/mocks.py | 1 + mc_backend/tests.py | 44 ++++--- 14 files changed, 197 insertions(+), 158 deletions(-) create mode 100644 mc_backend/mcserver/app/services/exerciseService.py diff --git a/mc_backend/mcserver/app/__init__.py b/mc_backend/mcserver/app/__init__.py index 5f27b2a..478a9c2 100644 --- a/mc_backend/mcserver/app/__init__.py +++ b/mc_backend/mcserver/app/__init__.py @@ -65,10 +65,11 @@ def full_init(app: Flask, cfg: Type[Config] = Config) -> None: """ Fully initializes the application, including logging.""" from mcserver.app.services import DatabaseService DatabaseService.init_db_update_info() - DatabaseService.update_exercises(is_csm=True) - DatabaseService.init_db_corpus() + from mcserver.app.services.corpusService import CorpusService + CorpusService.init_corpora() + from mcserver.app.services import ExerciseService + ExerciseService.update_exercises(is_csm=True) if not cfg.TESTING: - from mcserver.app.services.corpusService import CorpusService CorpusService.init_graphannis_logging() start_updater(app) @@ -130,8 +131,8 @@ def log_exception(sender_app: Flask, exception, **extra): def start_updater(app: Flask) -> Thread: """ Starts a new Thread for to perform updates in the background. """ - from mcserver.app.services import DatabaseService - t = Thread(target=DatabaseService.init_updater, args=(app,)) + from mcserver.app.services import CorpusService + t = Thread(target=CorpusService.init_updater, args=(app,)) t.daemon = True t.start() return t diff --git a/mc_backend/mcserver/app/api/corpusAPI.py b/mc_backend/mcserver/app/api/corpusAPI.py index fb38ed5..dca01d0 100644 --- a/mc_backend/mcserver/app/api/corpusAPI.py +++ b/mc_backend/mcserver/app/api/corpusAPI.py @@ -13,8 +13,8 @@ from mcserver.models_auto import Corpus def delete(cid: int) -> Union[Response, ConnexionResponse]: """The DELETE method for the corpus REST API. It deletes metadata for a specific text.""" - corpus: Corpus = db.session.query(Corpus).filter_by(cid=cid).first() - if corpus is None: + corpus: Corpus = DatabaseService.query(Corpus, filter_by=dict(cid=cid), first=True) + if not corpus: return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND, Config.ERROR_MESSAGE_CORPUS_NOT_FOUND) db.session.delete(corpus) DatabaseService.commit() @@ -23,16 +23,16 @@ def delete(cid: int) -> Union[Response, ConnexionResponse]: def get(cid: int) -> Union[Response, ConnexionResponse]: """The GET method for the corpus REST API. It provides metadata for a specific text.""" - corpus: Corpus = db.session.query(Corpus).filter_by(cid=cid).first() - if corpus is None: + corpus: Corpus = DatabaseService.query(Corpus, filter_by=dict(cid=cid), first=True) + if not corpus: return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND, Config.ERROR_MESSAGE_CORPUS_NOT_FOUND) return NetworkService.make_json_response(corpus.to_dict()) def patch(cid: int, **kwargs) -> Union[Response, ConnexionResponse]: """The PUT method for the corpus REST API. It provides updates metadata for a specific text.""" - corpus: Corpus = db.session.query(Corpus).filter_by(cid=cid).first() - if corpus is None: + corpus: Corpus = DatabaseService.query(Corpus, filter_by=dict(cid=cid), first=True) + if not corpus: return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND, Config.ERROR_MESSAGE_CORPUS_NOT_FOUND) for k, v in kwargs.items(): if v is not None: diff --git a/mc_backend/mcserver/app/api/corpusListAPI.py b/mc_backend/mcserver/app/api/corpusListAPI.py index ea50dbb..c0709dc 100644 --- a/mc_backend/mcserver/app/api/corpusListAPI.py +++ b/mc_backend/mcserver/app/api/corpusListAPI.py @@ -2,7 +2,6 @@ from connexion.lifecycle import ConnexionResponse from flask import Response from typing import List, Union -from mcserver.app import db from mcserver.app.models import ResourceType from mcserver.app.services import NetworkService, DatabaseService from mcserver.models_auto import Corpus, UpdateInfo @@ -10,11 +9,9 @@ from mcserver.models_auto import Corpus, UpdateInfo def get(last_update_time: int) -> Union[Response, ConnexionResponse]: """The GET method for the corpus list REST API. It provides metadata for all available texts.""" - ui_cts: UpdateInfo - ui_cts = db.session.query(UpdateInfo).filter_by(resource_type=ResourceType.cts_data.name).first() - DatabaseService.commit() - if ui_cts.last_modified_time >= last_update_time / 1000: - corpora: List[Corpus] = db.session.query(Corpus).all() - DatabaseService.commit() + ui_cts: UpdateInfo = DatabaseService.query( + UpdateInfo, filter_by=dict(resource_type=ResourceType.cts_data.name), first=True) + if ui_cts and ui_cts.last_modified_time >= last_update_time / 1000: + corpora: List[Corpus] = DatabaseService.query(Corpus) return NetworkService.make_json_response([x.to_dict() for x in corpora]) return NetworkService.make_json_response(None) diff --git a/mc_backend/mcserver/app/api/exerciseAPI.py b/mc_backend/mcserver/app/api/exerciseAPI.py index 4abdbb4..916f31f 100644 --- a/mc_backend/mcserver/app/api/exerciseAPI.py +++ b/mc_backend/mcserver/app/api/exerciseAPI.py @@ -10,7 +10,7 @@ from mcserver.app import db from mcserver.app.models import ExerciseType, Solution, ExerciseData, AnnisResponse, Phenomenon, TextComplexity, \ TextComplexityMeasure, ResourceType, ExerciseMC, GraphData from mcserver.app.services import AnnotationService, CorpusService, NetworkService, TextComplexityService, \ - DatabaseService + DatabaseService, ExerciseService from mcserver.config import Config from mcserver.models_auto import Exercise, TExercise, UpdateInfo from openapi.openapi_server.models import ExerciseForm @@ -28,9 +28,8 @@ def adjust_solutions(exercise_data: ExerciseData, exercise_type: str, solutions: def get(eid: str) -> Union[Response, ConnexionResponse]: - exercise: TExercise = db.session.query(Exercise).filter_by(eid=eid).first() - DatabaseService.commit() - if exercise is None: + exercise: TExercise = DatabaseService.query(Exercise, filter_by=dict(eid=eid), first=True) + if not exercise: return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND, Config.ERROR_MESSAGE_EXERCISE_NOT_FOUND) ar: AnnisResponse = CorpusService.get_corpus(cts_urn=exercise.urn, is_csm=False) if not ar.graph_data.nodes: @@ -67,7 +66,7 @@ def make_new_exercise(conll: str, correct_feedback: str, exercise_type: str, gen # generate a GUID so we can offer the exercise XML as a file download xml_guid = str(uuid.uuid4()) # assemble the mapped exercise data - ed: ExerciseData = AnnotationService.map_graph_data_to_exercise( + ed: ExerciseData = ExerciseService.map_graph_data_to_exercise( graph_data_raw=graph_data_raw, solutions=solutions, xml_guid=xml_guid) # for markWords exercises, add the maximum number of correct solutions to the description instructions += (f"({len(solutions)})" if exercise_type == ExerciseType.markWords.value else "") @@ -107,8 +106,8 @@ def map_exercise_data_to_database(exercise_data: ExerciseData, exercise_type: st work_title=work_title, urn=urn) # add the mapped exercise to the database db.session.add(new_exercise) - ui_exercises: UpdateInfo = db.session.query(UpdateInfo).filter_by( - resource_type=ResourceType.exercise_list.name).first() + ui_exercises: UpdateInfo = DatabaseService.query( + UpdateInfo, filter_by=dict(resource_type=ResourceType.exercise_list.name), first=True) ui_exercises.last_modified_time = datetime.utcnow().timestamp() DatabaseService.commit() return new_exercise diff --git a/mc_backend/mcserver/app/api/exerciseListAPI.py b/mc_backend/mcserver/app/api/exerciseListAPI.py index 0b82204..6672607 100644 --- a/mc_backend/mcserver/app/api/exerciseListAPI.py +++ b/mc_backend/mcserver/app/api/exerciseListAPI.py @@ -13,9 +13,8 @@ from openapi.openapi_server.models import MatchingExercise def get(lang: str, frequency_upper_bound: int, last_update_time: int, vocabulary: str = ""): """The GET method for the exercise list REST API. It provides metadata for all available exercises.""" vocabulary_set: Set[str] - ui_exercises: UpdateInfo = db.session.query(UpdateInfo).filter_by( - resource_type=ResourceType.exercise_list.name).first() - DatabaseService.commit() + ui_exercises: UpdateInfo = DatabaseService.query( + UpdateInfo, filter_by=dict(resource_type=ResourceType.exercise_list.name), first=True) if ui_exercises.last_modified_time < last_update_time / 1000: return NetworkService.make_json_response([]) try: @@ -28,8 +27,7 @@ def get(lang: str, frequency_upper_bound: int, last_update_time: int, vocabulary lang = Language(lang) except ValueError: lang = Language.English - exercises: List[Exercise] = db.session.query(Exercise).filter_by(language=lang.value) - DatabaseService.commit() + exercises: List[Exercise] = DatabaseService.query(Exercise, filter_by=dict(language=lang.value)) matching_exercises: List[MatchingExercise] = [MatchingExercise.from_dict(x.to_dict()) for x in exercises] if len(vocabulary_set): for exercise in matching_exercises: diff --git a/mc_backend/mcserver/app/api/fileAPI.py b/mc_backend/mcserver/app/api/fileAPI.py index 7c6400b..e18175f 100644 --- a/mc_backend/mcserver/app/api/fileAPI.py +++ b/mc_backend/mcserver/app/api/fileAPI.py @@ -17,7 +17,8 @@ from mcserver.models_auto import Exercise, UpdateInfo, LearningResult def clean_tmp_folder(): """ Cleans the files directory regularly. """ - ui_file: UpdateInfo = db.session.query(UpdateInfo).filter_by(resource_type=ResourceType.file_api_clean.name).first() + ui_file: UpdateInfo = DatabaseService.query( + UpdateInfo, filter_by=dict(resource_type=ResourceType.file_api_clean.name), first=True) ui_datetime: datetime = datetime.fromtimestamp(ui_file.last_modified_time) if (datetime.utcnow() - ui_datetime).total_seconds() > Config.INTERVAL_FILE_DELETE: for file in [x for x in os.listdir(Config.TMP_DIRECTORY) if x not in ".gitignore"]: @@ -35,8 +36,7 @@ def clean_tmp_folder(): def get(id: str, type: FileType, solution_indices: List[int]) -> Union[ETagResponseMixin, ConnexionResponse]: """The GET method for the file REST API. It provides the URL to download a specific file.""" clean_tmp_folder() - exercise: Exercise = db.session.query(Exercise).filter_by(eid=id).first() - DatabaseService.commit() + exercise: Exercise = DatabaseService.query(Exercise, filter_by=dict(eid=id), first=True) file_name: str = id + "." + str(type) mime_type: str = MimeType[type].value if exercise is None: diff --git a/mc_backend/mcserver/app/api/h5pAPI.py b/mc_backend/mcserver/app/api/h5pAPI.py index b978d8c..e54b8db 100644 --- a/mc_backend/mcserver/app/api/h5pAPI.py +++ b/mc_backend/mcserver/app/api/h5pAPI.py @@ -27,9 +27,8 @@ def determine_language(lang: str) -> Language: def get(eid: str, lang: str, solution_indices: List[int]) -> Union[Response, ConnexionResponse]: """ The GET method for the H5P REST API. It provides JSON templates for client-side H5P exercise layouts. """ language: Language = determine_language(lang) - exercise: Exercise = db.session.query(Exercise).filter_by(eid=eid).first() - DatabaseService.commit() - if exercise is None: + exercise: Exercise = DatabaseService.query(Exercise, filter_by=dict(eid=eid), first=True) + if not exercise: return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND, Config.ERROR_MESSAGE_EXERCISE_NOT_FOUND) text_field_content: str = get_text_field_content(exercise, solution_indices) if not text_field_content: @@ -106,9 +105,8 @@ def post(h5p_data: dict): """ The POST method for the H5P REST API. It offers client-side H5P exercises for download as ZIP archives. """ h5p_form: H5PForm = H5PForm.from_dict(h5p_data) language: Language = determine_language(h5p_form.lang) - exercise: Exercise = db.session.query(Exercise).filter_by(eid=h5p_form.eid).first() - DatabaseService.commit() - if exercise is None: + exercise: Exercise = DatabaseService.query(Exercise, filter_by=dict(eid=h5p_form.eid), first=True) + if not exercise: return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND, Config.ERROR_MESSAGE_EXERCISE_NOT_FOUND) text_field_content: str = get_text_field_content(exercise, h5p_form.solution_indices) if not text_field_content: diff --git a/mc_backend/mcserver/app/services/__init__.py b/mc_backend/mcserver/app/services/__init__.py index a504d53..81c54b3 100644 --- a/mc_backend/mcserver/app/services/__init__.py +++ b/mc_backend/mcserver/app/services/__init__.py @@ -4,7 +4,8 @@ from flask import Blueprint bp = Blueprint("services", __name__) -# the order of imports is important, please don't change it if you don't know what you are doing +# the order of imports is very important, please don't change it if you don't know what you are doing +from mcserver.app.services.databaseService import DatabaseService from mcserver.app.services.textService import TextService from mcserver.app.services.xmlService import XMLservice from mcserver.app.services.fileService import FileService @@ -14,4 +15,4 @@ from mcserver.app.services.customCorpusService import CustomCorpusService from mcserver.app.services.frequencyService import FrequencyService from mcserver.app.services.corpusService import CorpusService from mcserver.app.services.textComplexityService import TextComplexityService -from mcserver.app.services.databaseService import DatabaseService +from mcserver.app.services.exerciseService import ExerciseService diff --git a/mc_backend/mcserver/app/services/annotationService.py b/mc_backend/mcserver/app/services/annotationService.py index 58939c3..d17dda0 100644 --- a/mc_backend/mcserver/app/services/annotationService.py +++ b/mc_backend/mcserver/app/services/annotationService.py @@ -239,14 +239,6 @@ class AnnotationService: AnnotationService.sort_nodes(graph_data) return graph_data - @staticmethod - def map_graph_data_to_exercise(graph_data_raw: Dict, xml_guid: str, solutions: List[Solution]): - """ Creates an ExerciseData object from the separate parts. """ - # create the basis for the download URL - xml_url = "/" + xml_guid - graph_data: GraphData = AnnotationService.map_graph_data(graph_data_raw) - return ExerciseData(graph=graph_data, solutions=solutions, uri=xml_url) - @staticmethod def map_node(node: dict): """ Maps a node dictionary to the native NodeMC class. """ diff --git a/mc_backend/mcserver/app/services/corpusService.py b/mc_backend/mcserver/app/services/corpusService.py index 7942af5..554b74e 100644 --- a/mc_backend/mcserver/app/services/corpusService.py +++ b/mc_backend/mcserver/app/services/corpusService.py @@ -1,10 +1,12 @@ import sys +from datetime import datetime import rapidjson as json import os -from typing import List, Union, Set, Tuple +from typing import List, Union, Set, Tuple, Dict import requests from MyCapytain.retrievers.cts5 import HttpCtsRetriever from conllu import TokenList +from flask import Flask from graphannis import CAPI, ffi from graphannis.cs import ResultOrder from graphannis.errors import consume_errors, NoSuchCorpus, GraphANNISException @@ -13,13 +15,14 @@ from lxml import etree from networkx import graph, MultiDiGraph from networkx.readwrite import json_graph from requests import HTTPError +from sqlalchemy.exc import OperationalError from mcserver.app import db from mcserver.app.models import CitationLevel, GraphData, Solution, ExerciseType, Phenomenon, AnnisResponse, CorpusMC, \ - make_solution_element_from_salt_id, FrequencyItem + make_solution_element_from_salt_id, FrequencyItem, ResourceType from mcserver.app.services import AnnotationService, XMLservice, TextService, FileService, FrequencyService, \ - CustomCorpusService + CustomCorpusService, DatabaseService from mcserver.config import Config -from mcserver.models_auto import Corpus +from mcserver.models_auto import Corpus, UpdateInfo class CorpusService: @@ -51,6 +54,24 @@ class CorpusService: new_corpus.uri = "/{0}".format(new_corpus.cid) db.session.commit() + @staticmethod + def check_corpus_list_age(app: Flask) -> None: + """ Checks whether the corpus list needs to be updated. If yes, it performs the update. """ + app.logger.info("Corpus update started.") + ui_cts: UpdateInfo = DatabaseService.query( + UpdateInfo, filter_by=dict(resource_type=ResourceType.cts_data.name), first=True) + DatabaseService.commit() + if ui_cts is None: + app.logger.info("UpdateInfo not available!") + return + else: + ui_datetime: datetime = datetime.fromtimestamp(ui_cts.last_modified_time) + if (datetime.utcnow() - ui_datetime).total_seconds() > Config.INTERVAL_CORPUS_UPDATE: + CorpusService.update_corpora() + ui_cts.last_modified_time = datetime.utcnow().timestamp() + DatabaseService.commit() + app.logger.info("Corpus update completed.") + @staticmethod def find_matches(urn: str, aql: str, is_csm: bool = False) -> List[str]: """ Finds matches for a given URN and AQL and returns the corresponding node IDs. """ @@ -281,6 +302,30 @@ class CorpusService: ctx_left=ctx_left, ctx_right=ctx_right)) return AnnisResponse.from_dict(json.loads(response.text)) + @staticmethod + def init_corpora() -> None: + """Initializes the corpus list if it is not already there and up to date.""" + if DatabaseService.has_table(Config.DATABASE_TABLE_CORPUS): + CorpusService.existing_corpora = DatabaseService.query(Corpus) + urn_dict: Dict[str, int] = {v.source_urn: i for i, v in enumerate(CorpusService.existing_corpora)} + for cc in CustomCorpusService.custom_corpora: + if cc.corpus.source_urn in urn_dict: + existing_corpus: Corpus = CorpusService.existing_corpora[urn_dict[cc.corpus.source_urn]] + CorpusService.update_corpus( + title_value=cc.corpus.title, urn=cc.corpus.source_urn, author=cc.corpus.author, + corpus_to_update=existing_corpus, citation_levels=[ + cc.corpus.citation_level_1, cc.corpus.citation_level_2, + cc.corpus.citation_level_3]) + else: + citation_levels: List[CitationLevel] = [] + for cl in [cc.corpus.citation_level_1, cc.corpus.citation_level_2, + cc.corpus.citation_level_3]: + citation_levels += [cl] if cl != CitationLevel.default else [] + CorpusService.add_corpus( + title_value=cc.corpus.title, urn=cc.corpus.source_urn, + group_name_value=cc.corpus.author, citation_levels=citation_levels) + CorpusService.existing_corpora = DatabaseService.query(Corpus) + @staticmethod def init_graphannis_logging() -> None: """Initializes logging for the graphannis backend.""" @@ -289,6 +334,21 @@ class CorpusService: err) # Debug consume_errors(err) + @staticmethod + def init_updater(app: Flask) -> None: + """Initializes a thread that regularly performs updates.""" + app.app_context().push() + while True: + try: + CorpusService.check_corpus_list_age(app) + except OperationalError: + pass + import gc + gc.collect() + from time import sleep + # sleep for 1 hour + sleep(Config.INTERVAL_CORPUS_AGE_CHECK) + @staticmethod def is_urn(maybe_urn: str): """ Checks if the string represents a URN. """ @@ -338,7 +398,6 @@ class CorpusService: def update_corpora(): """Checks the remote repositories for new corpora to be included in our database.""" CorpusService.existing_corpora = db.session.query(Corpus).all() - db.session.commit() resolver: HttpCtsRetriever = HttpCtsRetriever(Config.CTS_API_BASE_URL) # check the appropriate literature for the desired author resp: str = resolver.getCapabilities(urn="urn:cts:latinLit") # "urn:cts:greekLit" for Greek diff --git a/mc_backend/mcserver/app/services/databaseService.py b/mc_backend/mcserver/app/services/databaseService.py index 8e37ddb..f4c6888 100644 --- a/mc_backend/mcserver/app/services/databaseService.py +++ b/mc_backend/mcserver/app/services/databaseService.py @@ -1,37 +1,15 @@ from datetime import datetime -from typing import List, Dict - -from flask import Flask +from typing import Union, Any from flask_migrate import stamp, upgrade -import rapidjson as json from sqlalchemy.exc import OperationalError, InvalidRequestError - +from sqlalchemy.orm import Query from mcserver.app import db -from mcserver.app.models import CitationLevel, ResourceType, TextComplexityMeasure, AnnisResponse, TextComplexity -from mcserver.app.services import CorpusService, CustomCorpusService, TextComplexityService +from mcserver.app.models import ResourceType from mcserver.config import Config -from mcserver.models_auto import Corpus, Exercise, UpdateInfo +from mcserver.models_auto import Corpus, Exercise, UpdateInfo, LearningResult class DatabaseService: - - @staticmethod - def check_corpus_list_age(app: Flask) -> None: - """ Checks whether the corpus list needs to be updated. If yes, it performs the update. """ - app.logger.info("Corpus update started.") - ui_cts: UpdateInfo = db.session.query(UpdateInfo).filter_by(resource_type=ResourceType.cts_data.name).first() - DatabaseService.commit() - if ui_cts is None: - app.logger.info("UpdateInfo not available!") - return - else: - ui_datetime: datetime = datetime.fromtimestamp(ui_cts.last_modified_time) - if (datetime.utcnow() - ui_datetime).total_seconds() > Config.INTERVAL_CORPUS_UPDATE: - CorpusService.update_corpora() - ui_cts.last_modified_time = datetime.utcnow().timestamp() - DatabaseService.commit() - app.logger.info("Corpus update completed.") - @staticmethod def commit(): """Commits the last action to the database and, if it fails, rolls back the current session.""" @@ -41,43 +19,25 @@ class DatabaseService: db.session.rollback() raise + @staticmethod + def has_table(table: str) -> bool: + """Checks if a table is present in the database or not.""" + return db.engine.dialect.has_table(db.engine, table) + @staticmethod def init_db_alembic() -> None: """In Docker, the alembic version is not initially written to the database, so we need to set it manually.""" - if not db.engine.dialect.has_table(db.engine, Config.DATABASE_TABLE_ALEMBIC): + if not DatabaseService.has_table(Config.DATABASE_TABLE_ALEMBIC): stamp(directory=Config.MIGRATIONS_DIRECTORY) upgrade(directory=Config.MIGRATIONS_DIRECTORY) - @staticmethod - def init_db_corpus() -> None: - """Initializes the corpus list if it is not already there and up to date.""" - if db.engine.dialect.has_table(db.engine, Config.DATABASE_TABLE_CORPUS): - CorpusService.existing_corpora = db.session.query(Corpus).all() - DatabaseService.commit() - urn_dict: Dict[str, int] = {v.source_urn: i for i, v in enumerate(CorpusService.existing_corpora)} - for cc in CustomCorpusService.custom_corpora: - if cc.corpus.source_urn in urn_dict: - existing_corpus: Corpus = CorpusService.existing_corpora[urn_dict[cc.corpus.source_urn]] - CorpusService.update_corpus(title_value=cc.corpus.title, urn=cc.corpus.source_urn, - author=cc.corpus.author, corpus_to_update=existing_corpus, - citation_levels=[cc.corpus.citation_level_1, cc.corpus.citation_level_2, - cc.corpus.citation_level_3]) - else: - citation_levels: List[CitationLevel] = [] - for cl in [cc.corpus.citation_level_1, cc.corpus.citation_level_2, cc.corpus.citation_level_3]: - citation_levels += [cl] if cl != CitationLevel.default else [] - CorpusService.add_corpus(title_value=cc.corpus.title, urn=cc.corpus.source_urn, - group_name_value=cc.corpus.author, - citation_levels=citation_levels) - CorpusService.existing_corpora = db.session.query(Corpus).all() - DatabaseService.commit() - @staticmethod def init_db_update_info() -> None: """Initializes update entries for all resources that have not yet been created.""" - if db.engine.dialect.has_table(db.engine, Config.DATABASE_TABLE_UPDATEINFO): + if DatabaseService.has_table(Config.DATABASE_TABLE_UPDATEINFO): for rt in ResourceType: - ui_cts: UpdateInfo = db.session.query(UpdateInfo).filter_by(resource_type=rt.name).first() + ui_cts: UpdateInfo = DatabaseService.query( + UpdateInfo, filter_by=dict(resource_type=rt.name), first=True) if ui_cts is None: ui_cts = UpdateInfo.from_dict(resource_type=rt.name, last_modified_time=1, created_time=datetime.utcnow().timestamp()) @@ -85,38 +45,16 @@ class DatabaseService: DatabaseService.commit() @staticmethod - def init_updater(app: Flask) -> None: - """Initializes a thread that regularly performs updates.""" - app.app_context().push() - while True: - try: - DatabaseService.check_corpus_list_age(app) - except OperationalError: - pass - import gc - gc.collect() - from time import sleep - # sleep for 1 hour - sleep(Config.INTERVAL_CORPUS_AGE_CHECK) - - @staticmethod - def update_exercises(is_csm: bool) -> None: - """Deletes old exercises.""" - if db.engine.dialect.has_table(db.engine, Config.DATABASE_TABLE_EXERCISE): - exercises: List[Exercise] = db.session.query(Exercise).all() - now: datetime = datetime.utcnow() - for exercise in exercises: - exercise_datetime: datetime = datetime.fromtimestamp(exercise.last_access_time) - # delete exercises that have not been accessed for a while, are not compatible anymore, or contain - # corrupted / empty data - if (now - exercise_datetime).total_seconds() > Config.INTERVAL_EXERCISE_DELETE or \ - not exercise.urn or not json.loads(exercise.solutions): - db.session.delete(exercise) - DatabaseService.commit() - # manually add text complexity measures for old exercises - elif not exercise.text_complexity: - ar: AnnisResponse = CorpusService.get_corpus(exercise.urn, is_csm=is_csm) - tc: TextComplexity = TextComplexityService.text_complexity(TextComplexityMeasure.all.name, - exercise.urn, is_csm, ar.graph_data) - exercise.text_complexity = tc.all - DatabaseService.commit() + def query(table: Union[Corpus, Exercise, LearningResult, UpdateInfo], filter_by: dict = None, + first: bool = False) -> Any: + """Executes a query on the database and rolls back the session if errors occur.""" + try: + ret_val: Query = db.session.query(table) + if filter_by: + ret_val = ret_val.filter_by(**filter_by) + ret_val = ret_val.first() if first else ret_val.all() + DatabaseService.commit() + return ret_val + except InvalidRequestError: + db.session.rollback() + return None diff --git a/mc_backend/mcserver/app/services/exerciseService.py b/mc_backend/mcserver/app/services/exerciseService.py new file mode 100644 index 0000000..fada64f --- /dev/null +++ b/mc_backend/mcserver/app/services/exerciseService.py @@ -0,0 +1,43 @@ +from datetime import datetime +from typing import List, Dict +import rapidjson as json +from mcserver import Config +from mcserver.app import db +from mcserver.app.models import AnnisResponse, TextComplexity, TextComplexityMeasure, GraphData, ExerciseData +from mcserver.app.services import DatabaseService, CorpusService, TextComplexityService, AnnotationService +from mcserver.models_auto import Exercise +from openapi.openapi_server.models import Solution + + +class ExerciseService: + """ Service for creating new and managing old exercises. """ + + @staticmethod + def map_graph_data_to_exercise(graph_data_raw: Dict, xml_guid: str, solutions: List[Solution]): + """ Creates an ExerciseData object from the separate parts. """ + # create the basis for the download URL + xml_url = "/" + xml_guid + graph_data: GraphData = AnnotationService.map_graph_data(graph_data_raw) + return ExerciseData(graph=graph_data, solutions=solutions, uri=xml_url) + + @staticmethod + def update_exercises(is_csm: bool) -> None: + """Deletes old exercises.""" + if DatabaseService.has_table(Config.DATABASE_TABLE_EXERCISE): + exercises: List[Exercise] = DatabaseService.query(Exercise) + now: datetime = datetime.utcnow() + for exercise in exercises: + exercise_datetime: datetime = datetime.fromtimestamp(exercise.last_access_time) + # delete exercises that have not been accessed for a while, are not compatible anymore, or contain + # corrupted / empty data + if (now - exercise_datetime).total_seconds() > Config.INTERVAL_EXERCISE_DELETE or \ + not exercise.urn or not json.loads(exercise.solutions): + db.session.delete(exercise) + DatabaseService.commit() + # manually add text complexity measures for old exercises + elif not exercise.text_complexity: + ar: AnnisResponse = CorpusService.get_corpus(exercise.urn, is_csm=is_csm) + tc: TextComplexity = TextComplexityService.text_complexity( + TextComplexityMeasure.all.name, exercise.urn, is_csm, ar.graph_data) + exercise.text_complexity = tc.all + DatabaseService.commit() diff --git a/mc_backend/mocks.py b/mc_backend/mocks.py index 0cbd81f..5fe577c 100644 --- a/mc_backend/mocks.py +++ b/mc_backend/mocks.py @@ -38,6 +38,7 @@ class MockQuery: self.ui: UpdateInfo = ui def all(self): + # DO NOT MAKE THIS POINT TO THE DATABASE SERVICE, IT WILL BE MOCKED ANYWAY return db.session.query(Corpus).all() def filter_by(self, **kwargs): diff --git a/mc_backend/tests.py b/mc_backend/tests.py index fb4d65e..732e9d2 100644 --- a/mc_backend/tests.py +++ b/mc_backend/tests.py @@ -26,7 +26,7 @@ from gensim.models import Word2Vec from lxml import etree from networkx import MultiDiGraph, Graph from requests import HTTPError -from sqlalchemy.exc import OperationalError +from sqlalchemy.exc import OperationalError, InvalidRequestError from sqlalchemy.orm import session from werkzeug.wrappers import Response @@ -40,7 +40,7 @@ from mcserver.app.models import ResourceType, FileType, ExerciseType, ExerciseDa VocabularyCorpus, TextComplexityMeasure, CitationLevel, FrequencyItem, TextComplexity, Dependency, PartOfSpeech, \ Choice, XapiStatement, ExerciseMC, CorpusMC, make_solution_element_from_salt_id, Sentence from mcserver.app.services import AnnotationService, CorpusService, FileService, CustomCorpusService, DatabaseService, \ - XMLservice, TextService, FrequencyService + XMLservice, TextService, FrequencyService, ExerciseService from mcserver.config import TestingConfig, Config from mcserver.models_auto import Corpus, Exercise, UpdateInfo, LearningResult from mocks import Mocks, MockResponse, MockW2V, MockQuery, TestHelper @@ -140,14 +140,14 @@ class McTestCase(unittest.TestCase): ui: UpdateInfo = UpdateInfo.from_dict(resource_type=ResourceType.cts_data.name, last_modified_time=lmt.timestamp(), created_time=1) mock.session.query.return_value = MockQuery(ui) - response: Response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_CORPORA, - query_string=dict(last_update_time=lut)) + response: Response = Mocks.app_dict[self.class_name].client.get( + TestingConfig.SERVER_URI_CORPORA, query_string=dict(last_update_time=lut)) data_json = json.loads(response.get_data()) if data_json: result = [x.to_dict() for x in result] self.assertEqual(data_json, result) - with patch.object(mcserver.app.api.corpusListAPI, "db") as mock_db: + with patch.object(mcserver.app.services.databaseService, "db") as mock_db: expect_result(self, mock_db, str(int(datetime.utcnow().timestamp() * 1000)), None, datetime.fromtimestamp(0)) db.session.add_all(Mocks.corpora) @@ -299,7 +299,7 @@ class McTestCase(unittest.TestCase): learning_result: str = Mocks.xapi_json_string Mocks.app_dict[self.class_name].client.post(TestingConfig.SERVER_URI_FILE, headers=Mocks.headers_form_data, data=dict(learning_result=learning_result)) - lrs: List[LearningResult] = db.session.query(LearningResult).all() + lrs: List[LearningResult] = DatabaseService.query(LearningResult) self.assertEqual(len(lrs), 1) data_dict: dict = dict(file_type=FileType.XML, urn=Mocks.urn_custom, html_content="") response: Response = Mocks.app_dict[self.class_name].client.post( @@ -364,11 +364,11 @@ class McTestCase(unittest.TestCase): def test_api_kwic_post(self): """ Posts an AQL query to create a KWIC visualization in SVG format. """ - ed1: ExerciseData = AnnotationService.map_graph_data_to_exercise( + ed1: ExerciseData = ExerciseService.map_graph_data_to_exercise( Mocks.annis_response_dict["graph_data_raw"], "", [Solution(target=make_solution_element_from_salt_id( 'salt:/urn:custom:latinLit:proiel.pal-agr.lat:1.1.1/doc1#sent159692tok1'))]) - ed2: ExerciseData = AnnotationService.map_graph_data_to_exercise( + ed2: ExerciseData = ExerciseService.map_graph_data_to_exercise( Mocks.annis_response_dict["graph_data_raw"], "", [Solution(target=make_solution_element_from_salt_id( 'salt:/urn:custom:latinLit:proiel.pal-agr.lat:1.1.1/doc1#sent159695tok10'))]) @@ -628,7 +628,7 @@ class McTestCase(unittest.TestCase): exercise.partially_correct_feedback, exercise.correct_feedback, exercise.instructions, exercise.exercise_type_translation, exercise.exercise_type, exercise.solutions, exercise.eid] self.assertEqual(expected_values, actual_values) - exercise_from_db: Exercise = db.session.query(Exercise).one() + exercise_from_db: Exercise = DatabaseService.query(Exercise, first=True) self.assertEqual(exercise, exercise_from_db) db.session.query(Exercise).delete() db.session.query(UpdateInfo).delete() @@ -754,8 +754,9 @@ class CsmTestCase(unittest.TestCase): db.session.add(ui_cts) DatabaseService.commit() utc_now: datetime = datetime.utcnow() - DatabaseService.check_corpus_list_age(Mocks.app_dict[self.class_name].app) - ui_cts: UpdateInfo = db.session.query(UpdateInfo).filter_by(resource_type=ResourceType.cts_data.name).first() + CorpusService.check_corpus_list_age(Mocks.app_dict[self.class_name].app) + ui_cts: UpdateInfo = DatabaseService.query( + UpdateInfo, filter_by=dict(resource_type=ResourceType.cts_data.name), first=True) self.assertGreater(ui_cts.last_modified_time, utc_now.timestamp()) db.session.query(UpdateInfo).delete() @@ -834,7 +835,7 @@ class CsmTestCase(unittest.TestCase): def test_init_updater(self): """Initializes the corpus list updater.""" - with patch.object(DatabaseService, 'check_corpus_list_age', side_effect=OperationalError("", [], "")): + with patch.object(CorpusService, 'check_corpus_list_age', side_effect=OperationalError("", [], "")): ui_cts: UpdateInfo = UpdateInfo.from_dict(resource_type=ResourceType.cts_data.name, last_modified_time=1, created_time=1) db.session.add(ui_cts) @@ -1065,8 +1066,9 @@ class CommonTestCase(unittest.TestCase): old_corpus.source_urn = cc.corpus.source_urn McTestCase.add_corpus(old_corpus) del old_corpus - DatabaseService.init_db_corpus() - corpus: Corpus = db.session.query(Corpus).filter_by(source_urn=cc.corpus.source_urn).first() + CorpusService.init_corpora() + corpus: Corpus = DatabaseService.query( + Corpus, filter_by=dict(source_urn=cc.corpus.source_urn), first=True) self.assertEqual(corpus.title, cc.corpus.title) db.session.query(Corpus).delete() db.session.query(UpdateInfo).delete() @@ -1181,6 +1183,16 @@ class CommonTestCase(unittest.TestCase): session.make_transient(Mocks.corpora[0]) session.make_transient(Mocks.exercise) + def test_query(self) -> None: + """Executes a query on the database and rolls back the session if errors occur.""" + + def raise_error(table: Any): + raise InvalidRequestError() + + with patch.object(mcserver.app.services.databaseService, "db") as db_mock: + db_mock.session.query.side_effect = raise_error + self.assertEqual(DatabaseService.query(Corpus), None) + def test_sort_nodes(self): """Sorts the nodes according to the ordering links, i.e. by their tokens' occurrence in the text.""" old_graph_data: GraphData = GraphData(nodes=[], links=[]) @@ -1217,8 +1229,8 @@ class CommonTestCase(unittest.TestCase): with patch.object(mcserver.app.services.textComplexityService.requests, "post", return_value=MockResponse(Mocks.text_complexity_json_string)): with patch.object(CorpusService, "get_corpus", return_value=Mocks.annis_response): - DatabaseService.update_exercises(False) - exercises = db.session.query(Exercise).all() + ExerciseService.update_exercises(False) + exercises = DatabaseService.query(Exercise) self.assertEqual(len(exercises), 1) self.assertEqual(exercises[0].text_complexity, 54.53) db.session.query(Exercise).delete() -- GitLab