diff --git a/coverage_local.sh b/coverage_local.sh index 5b398e71b07a19f2981efe1c7d61514b09d4ad2a..a052db9456f00924fa1f50789b203a7a8caa31b2 100755 --- a/coverage_local.sh +++ b/coverage_local.sh @@ -1,5 +1,5 @@ docker-compose build -docker-compose run --rm --entrypoint="npm run test-ci" mc_frontend >ci_frontend.log +docker-compose run --rm --entrypoint="npm run test-ci" mc_frontend > ci_frontend.log docker-compose run --rm --entrypoint="./coverage_backend.sh" mcserver > ci_backend.log ./coverage_ci.sh cat coverage.log diff --git a/docker-compose.csm.yml b/docker-compose.csm.yml deleted file mode 100644 index 5096ea06b76e5860e7fd9561ea7a9c5199e5e87d..0000000000000000000000000000000000000000 --- a/docker-compose.csm.yml +++ /dev/null @@ -1,6 +0,0 @@ -version: '3.7' - -services: - csm: - depends_on: - - mcserver diff --git a/docker-compose.mcserver.yml b/docker-compose.mcserver.yml deleted file mode 100644 index 2e29b2a10f11a4a44e81d018c0c61da040998ee3..0000000000000000000000000000000000000000 --- a/docker-compose.mcserver.yml +++ /dev/null @@ -1,6 +0,0 @@ -version: '3.7' - -services: - mcserver: - depends_on: - - csm diff --git a/docker-compose.yml b/docker-compose.yml index 31a131fc9802f73e705107bc4c3e7dd435276c63..94b74fe2bcee02f161ca168d134b28632aba3672 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,21 +1,6 @@ version: '3.7' services: - csm: - build: - context: ./mc_backend - dockerfile: Dockerfile - command: /home/mc/venv/bin/gunicorn -c csm/gunicorn_config.py run_csm:app - depends_on: - - db - environment: - - FLASK_APP=run_csm.py - - IS_THIS_A_DOCKER_CONTAINER=Yes - - PYTHONPATH=/home/mc - ports: - - "6555:6555" - restart: always - stdin_open: true db: image: postgres environment: diff --git a/mc_backend/README.md b/mc_backend/README.md index e29a4d7abf7db6d7ab99e39ee391b064085c3a67..36892c83976cb3e07a78ea4190e114b84915b54b 100644 --- a/mc_backend/README.md +++ b/mc_backend/README.md @@ -1,7 +1,7 @@ # Installing the backend via command line 1. Set up a PostgreSQL database manually (https://www.postgresql.org/download/). If necessary, adjust the URI in your .env file located at `mcserver/.env`. 2. Run `pip install -r requirements.txt`. -3. Run `python app.py` and `python run_csm.py` as separate processes. +3. Run `python app.py`. ## Endpoints The default starting point for the API will be at http://localhost:5000/mc/api/v1.0/corpora . diff --git a/mc_backend/csm/.gitignore b/mc_backend/csm/.gitignore deleted file mode 100644 index 09a2a6dfcab3f86056e9ef76d6a84008826de8e6..0000000000000000000000000000000000000000 --- a/mc_backend/csm/.gitignore +++ /dev/null @@ -1,13 +0,0 @@ -# git ls-files --others --exclude-from=.git/info/exclude -# Lines that start with '#' are comments. -# For a project mostly in C, the following would be a good set of -# exclude patterns (uncomment them if you want to use them): -# *.[oa] -# *~ -*.pyc -*.log* -*.env -*.db -*.db-journal -*.coverage -/env diff --git a/mc_backend/csm/__init__.py b/mc_backend/csm/__init__.py deleted file mode 100644 index d600973314472f7dbc310fbf2635d4ccc4b8a422..0000000000000000000000000000000000000000 --- a/mc_backend/csm/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -"""The main application: Machina Callida. - -It is a server-side backend for retrieving Latin texts and -generating language exercises for them.""" -import sys -from typing import Type -from flask import Flask -from csm.app import create_csm_app -from mcserver.config import Config, ProductionConfig, DevelopmentConfig, TestingConfig - - -def get_app() -> Flask: - return create_csm_app(get_cfg()) - - -def get_cfg() -> Type[Config]: - return ProductionConfig if Config.IS_PRODUCTION else ( - TestingConfig if len(sys.argv) > 1 and sys.argv[1] == Config.TEST_FLAG else DevelopmentConfig) - - -def run_app() -> None: - cfg: Type[Config] = get_cfg() - get_app().run(host=cfg.HOST_IP_CSM, port=cfg.CORPUS_STORAGE_MANAGER_PORT, use_reloader=False) - - -if __name__ == "__main__": - # reloader has to be disabled because of a bug with Flask and multiprocessing - run_app() diff --git a/mc_backend/csm/__main__.py b/mc_backend/csm/__main__.py deleted file mode 100644 index f1ab8bdd2d142a3936c55cca8f0bdff69b60b3b4..0000000000000000000000000000000000000000 --- a/mc_backend/csm/__main__.py +++ /dev/null @@ -1,3 +0,0 @@ -from csm import get_app, get_cfg - -get_app().run(host=get_cfg().HOST_IP_CSM, port=get_cfg().HOST_PORT, use_reloader=False) diff --git a/mc_backend/csm/app/__init__.py b/mc_backend/csm/app/__init__.py deleted file mode 100644 index 36f8cbadef4f6c5e76389dc96fa82ce0615da301..0000000000000000000000000000000000000000 --- a/mc_backend/csm/app/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -from typing import Type - -from flask import Flask -from graphannis.cs import CorpusStorageManager - -from mcserver import Config -from mcserver.app import init_app_common, init_logging - - -def create_csm_app(cfg: Type[Config] = Config) -> Flask: - """Creates a new Flask app that represents a Corpus Storage Manager.""" - - Config.CORPUS_STORAGE_MANAGER = CorpusStorageManager(Config.GRAPH_DATABASE_DIR) - app_csm: Flask = init_app_common(cfg=cfg, is_csm=True) - from csm.app.api import bp - app_csm.register_blueprint(bp) - init_logging(app_csm, Config.LOG_PATH_CSM) - return app_csm diff --git a/mc_backend/csm/app/api/__init__.py b/mc_backend/csm/app/api/__init__.py deleted file mode 100644 index 2e0a824a85c7ef80aed8db6a6b72256a6723cdd7..0000000000000000000000000000000000000000 --- a/mc_backend/csm/app/api/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -"""The API blueprint. Register it on the main application to enable the REST API for text retrieval.""" -from flask import Blueprint -from flask_restful import Api - -from mcserver import Config - -bp = Blueprint("api", __name__) -api = Api(bp) - -from . import frequencyAPI, textcomplexityAPI -from csm.app.api.annisFindAPI import AnnisFindAPI -from csm.app.api.corpusStorageManagerAPI import CorpusStorageManagerAPI -from csm.app.api.subgraphAPI import SubgraphAPI - -api.add_resource(AnnisFindAPI, Config.SERVER_URI_ANNIS_FIND, endpoint="find") -api.add_resource(CorpusStorageManagerAPI, Config.SERVER_URI_CSM, endpoint="csm") -api.add_resource(SubgraphAPI, Config.SERVER_URI_CSM_SUBGRAPH, endpoint="subgraph") diff --git a/mc_backend/csm/app/api/annisFindAPI.py b/mc_backend/csm/app/api/annisFindAPI.py deleted file mode 100644 index 90d96af0caa2a34d19599060546e1bfd6ba8b935..0000000000000000000000000000000000000000 --- a/mc_backend/csm/app/api/annisFindAPI.py +++ /dev/null @@ -1,21 +0,0 @@ -import flask -from flask_restful import Resource -from flask_restful.reqparse import RequestParser - -from mcserver.app.services import NetworkService, CorpusService - - -class AnnisFindAPI(Resource): - def __init__(self): - self.reqparse: RequestParser = NetworkService.base_request_parser.copy() - self.reqparse.add_argument("aql", type=str, required=True, location="form", help="No AQL provided") - self.reqparse.add_argument("urn", type=str, required=True, default="", location="form", help="No URN provided") - super(AnnisFindAPI, self).__init__() - - def get(self): - """ Returns matches from ANNIS for a given CTS URN and AQL. """ - # get request arguments - args: dict = flask.request.args - urn: str = args["urn"] - aql: str = args["aql"] - return NetworkService.make_json_response(CorpusService.find_matches(urn, aql, is_csm=True)) diff --git a/mc_backend/csm/app/api/corpusStorageManagerAPI.py b/mc_backend/csm/app/api/corpusStorageManagerAPI.py deleted file mode 100644 index 4200af2156a8954653fccebfcafced0b03eff2eb..0000000000000000000000000000000000000000 --- a/mc_backend/csm/app/api/corpusStorageManagerAPI.py +++ /dev/null @@ -1,59 +0,0 @@ -import json -from json import JSONDecodeError -from typing import Dict, List - -import flask -from conllu import TokenList -from flask_restful import Resource, abort -from flask_restful.reqparse import RequestParser - -from mcserver.app.models import ExerciseType, Phenomenon, AnnisResponse -from mcserver.app.services import CorpusService, NetworkService - - -class CorpusStorageManagerAPI(Resource): - """Represents an API for the Corpus Storage Manager. - - It manages the database and everything corpus-related.""" - - def __init__(self): - self.reqparse: RequestParser = NetworkService.base_request_parser.copy() - self.reqparse.add_argument("title", type=str, required=True, location="data", help="No title provided") - self.reqparse.add_argument("annotations", required=True, location="data", - help="No annotations provided") - self.reqparse.add_argument("aqls", required=True, location="data", help="No AQLs provided", - action="append") - self.reqparse.add_argument("exercise_type", type=str, required=True, location="data", - help="No exercise type provided") - self.reqparse.add_argument("search_phenomena", type=str, required=False, location="data", - help="No search phenomena provided") - self.reqparse.add_argument("urn", type=str, required=False, help="No text identifier provided") - super(CorpusStorageManagerAPI, self).__init__() - - def get(self): - """ Returns graph data for a given CTS URN. """ - # get request arguments - args: Dict = flask.request.args - cts_urn: str = args["urn"] - ar: AnnisResponse = CorpusService.get_corpus(cts_urn=cts_urn, is_csm=True) - if not ar.graph_data.nodes: - abort(404) - return NetworkService.make_json_response(ar.to_dict()) - - def post(self): - """Given the relevant corpus data, gives back search results as graph data.""" - args: dict = {} - try: - args = json.loads(flask.request.data.decode("utf-8")) - except JSONDecodeError: - abort(400) - title: str = args["title"] - annotations_or_urn: str = args["annotations"] - aqls: List[str] = args["aqls"] - exercise_type: ExerciseType = ExerciseType[args["exercise_type"]] - search_phenomena: List[Phenomenon] = [Phenomenon().__getattribute__(x.upper()) for x in - args["search_phenomena"]] - conll: List[TokenList] = CorpusService.get_annotations_from_string(annotations_or_urn) - ret_val: dict = CorpusService.process_corpus_data(title, conll, aqls, exercise_type, search_phenomena) - # serialize the results to json - return NetworkService.make_json_response(ret_val) diff --git a/mc_backend/csm/app/api/frequencyAPI.py b/mc_backend/csm/app/api/frequencyAPI.py deleted file mode 100644 index e7fcf7a58e65ae7e4d409bb34b71d6ec1c00f2a0..0000000000000000000000000000000000000000 --- a/mc_backend/csm/app/api/frequencyAPI.py +++ /dev/null @@ -1,17 +0,0 @@ -from typing import List, Dict, Set -from mcserver.app.models import Phenomenon, FrequencyItem -from mcserver.app.services import NetworkService, CorpusService, AnnotationService - - -def get(urn: str): - """ Returns results for a frequency query from ANNIS for a given CTS URN and AQL. """ - fa: List[FrequencyItem] = CorpusService.get_frequency_analysis(urn, is_csm=True) - # map the abbreviated values found by ANNIS to our own model - skip_set: Set[Phenomenon] = {Phenomenon.LEMMA, Phenomenon.DEPENDENCY} - for fi in fa: - for i in range(len(fi.values)): - if fi.phenomena[i] in skip_set: - continue - value_map: Dict[str, List[str]] = AnnotationService.phenomenon_map[fi.phenomena[i]] - fi.values[i] = next((x for x in value_map if fi.values[i] in value_map[x]), None) - return NetworkService.make_json_response([x.to_dict() for x in fa]) diff --git a/mc_backend/csm/app/api/subgraphAPI.py b/mc_backend/csm/app/api/subgraphAPI.py deleted file mode 100644 index 2f7a5272c156d811db812a71e803bbea4f3a6a71..0000000000000000000000000000000000000000 --- a/mc_backend/csm/app/api/subgraphAPI.py +++ /dev/null @@ -1,50 +0,0 @@ -import json -from typing import Dict, List -import flask -from flask_restful import Resource -from flask_restful.reqparse import RequestParser -from mcserver.app.models import ExerciseData, GraphData, Solution, AnnisResponse, make_solution_element_from_salt_id -from mcserver.app.services import CorpusService, AnnotationService, NetworkService - - -class SubgraphAPI(Resource): - def __init__(self): - self.reqparse: RequestParser = NetworkService.base_request_parser.copy() - self.reqparse.add_argument("aqls", required=False, location="data", help="No AQLs provided", action="append") - self.reqparse.add_argument("ctx_left", type=str, required=False, default="", location="data", - help="No left context provided") - self.reqparse.add_argument("ctx_right", type=str, required=False, default="", location="data", - help="No right context provided") - self.reqparse.add_argument("node_ids", type=str, required=False, location="data", help="No node IDs provided") - self.reqparse.add_argument("urn", type=str, required=False, default="", location="data", help="No URN provided") - super(SubgraphAPI, self).__init__() - - def get(self): - """ Returns subgraph data for a given CTS URN and node IDs. """ - args: Dict = flask.request.args - aql: str = str(args['aqls']) - urn: str = args["urn"] - ctx_left: int = int(args["ctx_left"]) - ctx_right: int = int(args["ctx_right"]) - ar: AnnisResponse = CorpusService.get_subgraph(urn, aql, ctx_left, ctx_right, is_csm=True) - return NetworkService.make_json_response(ar.to_dict()) - - def post(self): - """ Returns subgraph data for a given CTS URN and AQL. """ - # get request arguments - args: Dict = json.loads(flask.request.data.decode("utf-8")) - cts_urn: str = args["urn"] - aqls: List[str] = args["aqls"] - ctx_left: int = int(args["ctx_left"]) - ctx_right: int = int(args["ctx_right"]) - disk_urn: str = AnnotationService.get_disk_urn(cts_urn) - exercise_data_list: List[ExerciseData] = [] - for aql in aqls: - node_ids: List[str] = CorpusService.find_matches(cts_urn, aql, is_csm=True) - for node_id in node_ids: - gd: GraphData = AnnotationService.get_single_subgraph( - disk_urn, [node_id], ctx_left, ctx_right, is_csm=True) - exercise_data_list.append(ExerciseData( - graph=gd, uri="", solutions=[Solution(target=make_solution_element_from_salt_id(node_id))])) - ret_val: List[dict] = [x.serialize() for x in exercise_data_list] - return NetworkService.make_json_response(ret_val) diff --git a/mc_backend/csm/app/api/textcomplexityAPI.py b/mc_backend/csm/app/api/textcomplexityAPI.py deleted file mode 100644 index 5cc80022391581d58e461232665fa46350295641..0000000000000000000000000000000000000000 --- a/mc_backend/csm/app/api/textcomplexityAPI.py +++ /dev/null @@ -1,12 +0,0 @@ -import rapidjson as json -from mcserver.app.models import AnnisResponse, TextComplexity -from mcserver.app.services import NetworkService, CorpusService, TextComplexityService -from openapi.openapi_server.models import TextComplexityForm - - -def post(complexity_data: dict): - tcf: TextComplexityForm = TextComplexityForm.from_dict(complexity_data) - ar: AnnisResponse = AnnisResponse.from_dict(json.loads(tcf.annis_response)) if tcf.annis_response \ - else CorpusService.get_corpus(tcf.urn, is_csm=True) - tc: TextComplexity = TextComplexityService.text_complexity(tcf.measure, tcf.urn, True, ar.graph_data) - return NetworkService.make_json_response(tc.to_dict()) diff --git a/mc_backend/csm/csm_api.yaml b/mc_backend/csm/csm_api.yaml deleted file mode 100644 index 79dc31abce92eee69f1e7dc8e66314c947e17ebb..0000000000000000000000000000000000000000 --- a/mc_backend/csm/csm_api.yaml +++ /dev/null @@ -1,42 +0,0 @@ -openapi: "3.0.0" - -info: - title: Machina Callida Backend REST API (Corpus Storage Manager) - version: "1.0" -servers: - - url: http://localhost:6555/mc/api/v1.0 - -paths: - /frequency: - get: - summary: Returns results for a frequency query from ANNIS for a given CTS URN. - operationId: csm.app.api.frequencyAPI.get - responses: - 200: - description: Frequency analysis, i.e. a list of frequency items. - content: - application/json: - schema: - type: array - description: List of items with frequency data for linguistic phenomena. - items: - $ref: "../openapi_models.yaml#/components/schemas/FrequencyItem" - parameters: - - $ref: '../openapi_models.yaml#/components/parameters/UrnParam' - /textcomplexity: - post: - summary: Gives users measures of text complexity for a given text. - operationId: csm.app.api.textcomplexityAPI.post - responses: - 200: - description: Text complexity measures for a given text. - content: - application/json: - schema: - $ref: '../openapi_models.yaml#/components/schemas/TextComplexity' - requestBody: - required: true - content: - application/x-www-form-urlencoded: - schema: - $ref: '../openapi_models.yaml#/components/schemas/TextComplexityForm' diff --git a/mc_backend/csm/gunicorn_config.py b/mc_backend/csm/gunicorn_config.py deleted file mode 100644 index 31f377d6f3e4701dadbb308e529ec110277165cc..0000000000000000000000000000000000000000 --- a/mc_backend/csm/gunicorn_config.py +++ /dev/null @@ -1,8 +0,0 @@ -"""Configuration for the gunicorn server""" -from mcserver import Config - -bind = "{0}:{1}".format(Config.HOST_IP_CSM, Config.CORPUS_STORAGE_MANAGER_PORT) -debug = False -reload = True -timeout = 3600 -workers = 1 diff --git a/mc_backend/mcserver/app/__init__.py b/mc_backend/mcserver/app/__init__.py index 478a9c284a8037a0fe042c97e47c01949d17846e..d066e39d8122b25a36ad593a22178568f24f839e 100644 --- a/mc_backend/mcserver/app/__init__.py +++ b/mc_backend/mcserver/app/__init__.py @@ -7,7 +7,6 @@ from threading import Thread from time import strftime from typing import Type import connexion -import flask import open_alchemy import prance from connexion import FlaskApp @@ -15,6 +14,7 @@ from flask import Flask, got_request_exception, request, Response, send_from_dir from flask_cors import CORS from flask_migrate import Migrate from flask_sqlalchemy import SQLAlchemy +from graphannis.cs import CorpusStorageManager from open_alchemy import init_yaml from mcserver.config import Config @@ -47,6 +47,7 @@ def create_app(cfg: Type[Config] = Config) -> Flask: # use local postgres database for migrations if len(sys.argv) > 2 and sys.argv[2] == Config.FLASK_MIGRATE: cfg.SQLALCHEMY_DATABASE_URI = Config.DATABASE_URL_LOCAL + Config.CORPUS_STORAGE_MANAGER = CorpusStorageManager(Config.GRAPH_DATABASE_DIR) app: Flask = init_app_common(cfg=cfg) from mcserver.app.services import bp as services_bp app.register_blueprint(services_bp) @@ -68,19 +69,17 @@ def full_init(app: Flask, cfg: Type[Config] = Config) -> None: from mcserver.app.services.corpusService import CorpusService CorpusService.init_corpora() from mcserver.app.services import ExerciseService - ExerciseService.update_exercises(is_csm=True) + ExerciseService.update_exercises() if not cfg.TESTING: CorpusService.init_graphannis_logging() start_updater(app) -def init_app_common(cfg: Type[Config] = Config, is_csm: bool = False) -> Flask: +def init_app_common(cfg: Type[Config] = Config) -> Flask: """ Initializes common Flask parts, e.g. CORS, configuration, database, migrations and custom corpora.""" - spec_dir: str = Config.CSM_DIRECTORY if is_csm else Config.MC_SERVER_DIRECTORY connexion_app: FlaskApp = connexion.FlaskApp( - __name__, port=(cfg.CORPUS_STORAGE_MANAGER_PORT if is_csm else cfg.HOST_PORT), specification_dir=spec_dir) - spec_path: str = Config.API_SPEC_CSM_FILE_PATH if is_csm else Config.API_SPEC_MCSERVER_FILE_PATH - parser = prance.ResolvingParser(spec_path, lazy=True, strict=False) # str(Path(spec_path).absolute()) + __name__, port=cfg.HOST_PORT, specification_dir=Config.MC_SERVER_DIRECTORY) + parser = prance.ResolvingParser(Config.API_SPEC_MCSERVER_FILE_PATH, lazy=True, strict=False) parser.parse() connexion_app.add_api(parser.specification) apply_event_handlers(connexion_app) @@ -91,15 +90,13 @@ def init_app_common(cfg: Type[Config] = Config, is_csm: bool = False) -> Flask: app.app_context().push() db.init_app(app) migrate.init_app(app, db) - if is_csm or cfg.TESTING: - db.create_all() - if is_csm: - from mcserver.app.services.databaseService import DatabaseService - DatabaseService.init_db_alembic() + db.create_all() + from mcserver.app.services.databaseService import DatabaseService + DatabaseService.init_db_alembic() from mcserver.app.services.textService import TextService TextService.init_proper_nouns_list() TextService.init_stop_words_latin() - if is_csm: + if not Config.TESTING: full_init(app, cfg) return app @@ -118,7 +115,7 @@ def init_logging(app: Flask, log_file_path: str): app.logger.warning(f"Accessing database at: {database_uri}") -def log_exception(sender_app: Flask, exception, **extra): +def log_exception(sender_app: Flask, exception: Exception, **extra): """Logs errors that occur while the Flask app is working. Arguments: @@ -126,7 +123,7 @@ def log_exception(sender_app: Flask, exception, **extra): exception -- the exception to be logged **extra -- any additional arguments """ - sender_app.logger.info(f"ERROR for {flask.request.url}") + sender_app.logger.info(f"ERROR for {request.url}") def start_updater(app: Flask) -> Thread: diff --git a/mc_backend/mcserver/app/api/exerciseAPI.py b/mc_backend/mcserver/app/api/exerciseAPI.py index 358f5c827802cf4c48bfb0660ccead0d0a4c814e..2aad37861965bd4efbf20137417dfcc314dff89e 100644 --- a/mc_backend/mcserver/app/api/exerciseAPI.py +++ b/mc_backend/mcserver/app/api/exerciseAPI.py @@ -4,6 +4,7 @@ import connexion import rapidjson as json from typing import List, Dict, Union import requests +from conllu import TokenList from connexion.lifecycle import ConnexionResponse from flask import Response from mcserver.app import db @@ -33,7 +34,7 @@ def get(eid: str) -> Union[Response, ConnexionResponse]: exercise: TExercise = DatabaseService.query(Exercise, filter_by=dict(eid=eid), first=True) if not exercise: return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND, Config.ERROR_MESSAGE_EXERCISE_NOT_FOUND) - ar: AnnisResponse = CorpusService.get_corpus(cts_urn=exercise.urn, is_csm=False) + ar: AnnisResponse = CorpusService.get_corpus(cts_urn=exercise.urn) if not ar.graph_data.nodes: return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND, Config.ERROR_MESSAGE_CORPUS_NOT_FOUND) exercise.last_access_time = datetime.utcnow().timestamp() @@ -47,17 +48,10 @@ def get(eid: str) -> Union[Response, ConnexionResponse]: def get_graph_data(title: str, conll_string_or_urn: str, aqls: List[str], exercise_type: ExerciseType, - search_phenomena: List[Phenomenon]): + search_phenomena: List[Phenomenon]) -> dict: """Sends annotated text data or a URN to the Corpus Storage Manager in order to get a graph.""" - url: str = f"{Config.INTERNET_PROTOCOL}{Config.HOST_IP_CSM}:{Config.CORPUS_STORAGE_MANAGER_PORT}" - data: str = json.dumps( - dict(title=title, annotations=conll_string_or_urn, aqls=aqls, exercise_type=exercise_type.name, - search_phenomena=search_phenomena)) - response: requests.Response = requests.post(url, data=data) - try: - return json.loads(response.text) - except ValueError: - raise + conll: List[TokenList] = CorpusService.get_annotations_from_string(conll_string_or_urn) + return CorpusService.process_corpus_data(title, conll, aqls, exercise_type, search_phenomena) def make_new_exercise(conll: str, correct_feedback: str, exercise_type: str, general_feedback: str, @@ -97,7 +91,7 @@ def map_exercise_data_to_database(exercise_data: ExerciseData, exercise_type: st solutions: List[Solution] = adjust_solutions(exercise_data=exercise_data, solutions=solutions, exercise_type=exercise_type) quiz_solutions: str = json.dumps([x.to_dict() for x in solutions]) - tc: TextComplexity = TextComplexityService.text_complexity(TextComplexityMeasure.all.name, urn, False, + tc: TextComplexity = TextComplexityService.text_complexity(TextComplexityMeasure.all.name, urn, exercise_data.graph) new_exercise: Exercise = ExerciseMC.from_dict( conll=conll, correct_feedback=correct_feedback, eid=xml_guid, exercise_type=exercise_type, @@ -128,7 +122,7 @@ def post(exercise_data: dict) -> Union[Response, ConnexionResponse]: search_values_list] # if there is custom text instead of a URN, immediately annotate it conll_string_or_urn: str = ef.urn if CorpusService.is_urn(ef.urn) else AnnotationService.get_udpipe( - CorpusService.get_raw_text(ef.urn, False)) + CorpusService.get_raw_text(ef.urn)) try: # construct graph from CONLL data response: dict = get_graph_data(title=ef.urn, conll_string_or_urn=conll_string_or_urn, aqls=aqls, diff --git a/mc_backend/mcserver/app/api/frequencyAPI.py b/mc_backend/mcserver/app/api/frequencyAPI.py index a9dd4de9e3f3d67c9d63114e384319d5aa8ba078..c082e78a8182fb543ba91787ec475fe41cdeb7fe 100644 --- a/mc_backend/mcserver/app/api/frequencyAPI.py +++ b/mc_backend/mcserver/app/api/frequencyAPI.py @@ -1,12 +1,17 @@ -import requests -import rapidjson as json -from mcserver import Config -from mcserver.app.services import NetworkService +from typing import List, Set, Dict +from mcserver.app.services import NetworkService, CorpusService, AnnotationService +from openapi.openapi_server.models import FrequencyItem, Phenomenon def get(urn: str): """ Returns results for a frequency query from ANNIS for a given CTS URN and AQL. """ - url: str = f"{Config.INTERNET_PROTOCOL}{Config.HOST_IP_CSM}:{Config.CORPUS_STORAGE_MANAGER_PORT}" + \ - Config.SERVER_URI_FREQUENCY - response: requests.Response = requests.get(url, params=dict(urn=urn)) - return NetworkService.make_json_response(json.loads(response.text)) + fa: List[FrequencyItem] = CorpusService.get_frequency_analysis(urn) + # map the abbreviated values found by ANNIS to our own model + skip_set: Set[Phenomenon] = {Phenomenon.LEMMA, Phenomenon.DEPENDENCY} + for fi in fa: + for i in range(len(fi.values)): + if fi.phenomena[i] in skip_set: + continue + value_map: Dict[str, List[str]] = AnnotationService.phenomenon_map[fi.phenomena[i]] + fi.values[i] = next((x for x in value_map if fi.values[i] in value_map[x]), None) + return NetworkService.make_json_response([x.to_dict() for x in fa]) diff --git a/mc_backend/mcserver/app/api/kwicAPI.py b/mc_backend/mcserver/app/api/kwicAPI.py index ff2bfdde4077a5b31a5436303e58269578c378cf..c09b3d4ace9e4e50601fd9ca3e4ccaab50c55d2c 100644 --- a/mc_backend/mcserver/app/api/kwicAPI.py +++ b/mc_backend/mcserver/app/api/kwicAPI.py @@ -6,15 +6,13 @@ from collections import OrderedDict from sys import platform from tempfile import mkstemp from typing import List, Dict - -import requests from bs4 import BeautifulSoup, ResultSet, Tag from conllu import TokenList from flask import Response -from mcserver.app.models import ExerciseType, ExerciseData, LinkMC, NodeMC -from mcserver.app.services import AnnotationService, NetworkService +from mcserver.app.models import ExerciseType, ExerciseData, LinkMC, NodeMC, make_solution_element_from_salt_id +from mcserver.app.services import AnnotationService, NetworkService, CorpusService from mcserver.config import Config -from openapi.openapi_server.models import KwicForm +from openapi.openapi_server.models import KwicForm, GraphData, Solution def post(kwic_data: dict) -> Response: @@ -23,12 +21,15 @@ def post(kwic_data: dict) -> Response: kwic_form: KwicForm = KwicForm.from_dict(kwic_data) search_values_list: List[str] = json.loads(kwic_form.search_values) aqls: List[str] = AnnotationService.map_search_values_to_aql(search_values_list, ExerciseType.kwic) - url: str = f"{Config.INTERNET_PROTOCOL}{Config.HOST_IP_CSM}:{Config.CORPUS_STORAGE_MANAGER_PORT}{Config.SERVER_URI_CSM_SUBGRAPH}" - data: str = json.dumps( - dict(urn=kwic_data["urn"], aqls=aqls, ctx_left=str(kwic_form.ctx_left), ctx_right=str(kwic_form.ctx_right))) - response: requests.Response = requests.post(url, data=data) - response_content: List[dict] = json.loads(response.text) - exercise_data_list: List[ExerciseData] = [ExerciseData(json_dict=x) for x in response_content] + disk_urn: str = AnnotationService.get_disk_urn(kwic_form.urn) + exercise_data_list: List[ExerciseData] = [] + for aql in aqls: + node_ids: List[str] = CorpusService.find_matches(kwic_form.urn, aql) + for node_id in node_ids: + gd: GraphData = AnnotationService.get_single_subgraph( + disk_urn, [node_id], kwic_form.ctx_left, kwic_form.ctx_right) + exercise_data_list.append(ExerciseData( + graph=gd, uri="", solutions=[Solution(target=make_solution_element_from_salt_id(node_id))])) ret_val: str = "" for i in range(len(exercise_data_list)): ret_val += handle_exercise_data(exercise_data_list[i], kwic_form.ctx_left, kwic_form.ctx_right) diff --git a/mc_backend/mcserver/app/api/rawTextAPI.py b/mc_backend/mcserver/app/api/rawTextAPI.py index 7c038be879976de1d5d62a0a9a3a113967eee02a..2c75d4f44650b87ffe7425f8788b695298a91fdb 100644 --- a/mc_backend/mcserver/app/api/rawTextAPI.py +++ b/mc_backend/mcserver/app/api/rawTextAPI.py @@ -11,9 +11,9 @@ from mcserver.app.services import CorpusService, NetworkService, TextComplexityS def get(urn: str) -> Union[Response, ConnexionResponse]: """Provides the raw text for a requested text passage.""" - ar: AnnisResponse = CorpusService.get_corpus(cts_urn=urn, is_csm=False) + ar: AnnisResponse = CorpusService.get_corpus(cts_urn=urn) if not ar.graph_data.nodes: return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND, Config.ERROR_MESSAGE_CORPUS_NOT_FOUND) - ar.text_complexity = TextComplexityService.text_complexity(TextComplexityMeasure.all.name, urn, False, + ar.text_complexity = TextComplexityService.text_complexity(TextComplexityMeasure.all.name, urn, ar.graph_data).to_dict() return NetworkService.make_json_response(ar.to_dict()) diff --git a/mc_backend/mcserver/app/api/textcomplexityAPI.py b/mc_backend/mcserver/app/api/textcomplexityAPI.py index 59f6211e0c5a91fd8a4c6e0091e4f1b179e81f7b..1128c65540be6bbc9e8c0b1366ed80bac2429413 100644 --- a/mc_backend/mcserver/app/api/textcomplexityAPI.py +++ b/mc_backend/mcserver/app/api/textcomplexityAPI.py @@ -4,6 +4,6 @@ from mcserver.app.services import NetworkService, CorpusService, TextComplexityS def get(measure: str, urn: str): """Gives users measures of text complexity for a given text.""" - ar: AnnisResponse = CorpusService.get_corpus(urn, is_csm=False) - tc: TextComplexity = TextComplexityService.text_complexity(measure, urn, False, ar.graph_data) + ar: AnnisResponse = CorpusService.get_corpus(urn) + tc: TextComplexity = TextComplexityService.text_complexity(measure, urn, ar.graph_data) return NetworkService.make_json_response(tc.to_dict()) diff --git a/mc_backend/mcserver/app/api/vocabularyAPI.py b/mc_backend/mcserver/app/api/vocabularyAPI.py index 001eca3e15837488332e02fa7040599a209498ae..19b43b914767c9f0704fc3cc6ea3e26c4a12c78c 100644 --- a/mc_backend/mcserver/app/api/vocabularyAPI.py +++ b/mc_backend/mcserver/app/api/vocabularyAPI.py @@ -45,7 +45,7 @@ def get(frequency_upper_bound: int, query_urn: str, vocabulary: str) -> Response # punctuation should count as a match because we don't want to count this as part of the vocabulary for char in string.punctuation: vocabulary_set.add(char) - ar: AnnisResponse = CorpusService.get_corpus(cts_urn=query_urn, is_csm=False) + ar: AnnisResponse = CorpusService.get_corpus(cts_urn=query_urn) sentences: List[Sentence] = check_vocabulary(ar.graph_data, vocabulary_set) return NetworkService.make_json_response([x.to_dict() for x in sentences]) @@ -58,12 +58,12 @@ def post(vocabulary_data: dict): # punctuation should count as a match because we don't want to count this as part of the vocabulary for char in string.punctuation: vocabulary_set.add(char) - ar: AnnisResponse = CorpusService.get_corpus(cts_urn=vf.query_urn, is_csm=False) + ar: AnnisResponse = CorpusService.get_corpus(cts_urn=vf.query_urn) for node in ar.graph_data.nodes: if not TextService.check_lemma_in_vocabulary(target_lemma=node.udep_lemma, vocabulary_set=vocabulary_set): node.is_oov = True ar: AnnisResponse = AnnisResponse( solutions=[], uri="", exercise_id="", graph_data=ar.graph_data) ar.text_complexity = TextComplexityService.text_complexity( - TextComplexityMeasure.all.name, vf.query_urn, False, ar.graph_data).to_dict() + TextComplexityMeasure.all.name, vf.query_urn, ar.graph_data).to_dict() return NetworkService.make_json_response(ar.to_dict()) diff --git a/mc_backend/mcserver/app/services/annotationService.py b/mc_backend/mcserver/app/services/annotationService.py index 979c44a50711e4946be207aa205bbd04255ad6c0..9f80eef8cbbc811be86b513b94dbfb80fcf842ce 100644 --- a/mc_backend/mcserver/app/services/annotationService.py +++ b/mc_backend/mcserver/app/services/annotationService.py @@ -171,11 +171,9 @@ class AnnotationService: return int(node.id.split("#")[-1].split("tok")[0].replace("sent", "")) @staticmethod - def get_single_subgraph(disk_urn: str, node_ids: List[str], ctx_left: int = 5, ctx_right: int = 5, - is_csm: bool = False) -> GraphData: + def get_single_subgraph(disk_urn: str, node_ids: List[str], ctx_left: int = 5, ctx_right: int = 5) \ + -> GraphData: """ Retrieves a single subgraph for a given URN and node IDs. """ - if not is_csm: - raise NotImplementedError mdg: MultiDiGraph = Config.CORPUS_STORAGE_MANAGER.subgraph(corpus_name=disk_urn, node_ids=node_ids, ctx_left=ctx_left, ctx_right=ctx_right) graph_data_raw: dict = json_graph.node_link_data(mdg) @@ -211,10 +209,10 @@ class AnnotationService: return "@" in urn @staticmethod - def map_conll_to_graph(corpus_name: str, conll: List[TokenList], cs: CorpusStorageManager, file_name: str): + def map_conll_to_graph(corpus_name: str, conll: List[TokenList], file_name: str): """ Saves an annotated corpus in CONLL format to the ANNIS corpus storage. """ # delete any existing corpus with this name - cs.delete_corpus(file_name) + Config.CORPUS_STORAGE_MANAGER.delete_corpus(file_name) # currently there is only one document because texts are their own corpus doc_name: str = 'doc1' with GraphUpdate() as g: @@ -226,7 +224,7 @@ class AnnotationService: # the document is part of the corpus g.add_edge(doc_path, corpus_name, 'annis', 'PartOf', '') AnnotationService.add_annotations_to_graph(conll, g, doc_name, doc_path) - cs.apply_update(file_name, g) + Config.CORPUS_STORAGE_MANAGER.apply_update(file_name, g) @staticmethod def map_graph_data(graph_data_raw: dict) -> GraphData: diff --git a/mc_backend/mcserver/app/services/corpusService.py b/mc_backend/mcserver/app/services/corpusService.py index a20492a43a2e5e82c267cdf1efb6a4022cccc25e..c1030bd3ff76f6cc2f5fdbf6456ce1d61be8e1b2 100644 --- a/mc_backend/mcserver/app/services/corpusService.py +++ b/mc_backend/mcserver/app/services/corpusService.py @@ -74,25 +74,19 @@ class CorpusService: app.logger.info("Corpus update completed.") @staticmethod - def find_matches(urn: str, aql: str, is_csm: bool = False) -> List[str]: + def find_matches(urn: str, aql: str) -> List[str]: """ Finds matches for a given URN and AQL and returns the corresponding node IDs. """ - if is_csm: - disk_urn: str = AnnotationService.get_disk_urn(urn) - result_list: List[List[str]] - try: - result_list = Config.CORPUS_STORAGE_MANAGER.find(corpus_name=disk_urn, query=aql, limit=sys.maxsize, - order=ResultOrder.NotSorted) - except NoSuchCorpus: - CorpusService.get_corpus(urn, True) - result_list = Config.CORPUS_STORAGE_MANAGER.find(corpus_name=disk_urn, query=aql, limit=sys.maxsize, - order=ResultOrder.NotSorted) - # extract the SALT ID for each match - return [y for x in node_name_from_match(result_list) for y in x] - else: - url: str = Config.INTERNET_PROTOCOL + f"{Config.HOST_IP_CSM}:{Config.CORPUS_STORAGE_MANAGER_PORT}" + \ - Config.SERVER_URI_ANNIS_FIND - response: requests.Response = requests.get(url, params=dict(urn=urn, aql=aql)) - return json.loads(response.text) + disk_urn: str = AnnotationService.get_disk_urn(urn) + result_list: List[List[str]] + try: + result_list = Config.CORPUS_STORAGE_MANAGER.find( + corpus_name=disk_urn, query=aql, limit=sys.maxsize, order=ResultOrder.NotSorted) + except NoSuchCorpus: + CorpusService.get_corpus(urn) + result_list = Config.CORPUS_STORAGE_MANAGER.find( + corpus_name=disk_urn, query=aql, limit=sys.maxsize, order=ResultOrder.NotSorted) + # extract the SALT ID for each match + return [y for x in node_name_from_match(result_list) for y in x] @staticmethod def get_annotations_from_string(annotations_or_urn: str) -> List[TokenList]: @@ -105,55 +99,43 @@ class CorpusService: conll = CustomCorpusService.get_custom_corpus_annotations(annotations_or_urn) else: if CorpusService.is_urn(annotations_or_urn): - raw_text: str = CorpusService.get_raw_text(urn=annotations_or_urn, is_csm=True) + raw_text: str = CorpusService.get_raw_text(urn=annotations_or_urn) annotations_or_urn = AnnotationService.get_udpipe(raw_text) # parse CONLL and add root dependencies as separate node annotations conll = AnnotationService.parse_conll_string(annotations_or_urn) return conll @staticmethod - def get_corpus(cts_urn: str, is_csm: bool) -> AnnisResponse: + def get_corpus(cts_urn: str) -> AnnisResponse: """ Loads the text for a standard corpus from the CTS API or cache. """ - if is_csm: - # get graph data for further processing - graph_data_raw: dict = CorpusService.get_graph_data(cts_urn) - if not graph_data_raw: - return AnnisResponse(graph_data=GraphData(links=[], nodes=[])) - graph_data: GraphData = AnnotationService.map_graph_data(graph_data_raw) - ar: AnnisResponse = AnnisResponse(solutions=[], uri="", exercise_id="", graph_data=graph_data) - return ar - else: - # there is actually no text, only a URN, so we need to get it ourselves - url: str = f"{Config.INTERNET_PROTOCOL}{Config.HOST_IP_CSM}:{Config.CORPUS_STORAGE_MANAGER_PORT}/" - response: requests.Response = requests.get(url, params=dict(urn=cts_urn)) - return AnnisResponse.from_dict(json.loads(response.text)) + # get graph data for further processing + graph_data_raw: dict = CorpusService.get_graph_data(cts_urn) + if not graph_data_raw: + return AnnisResponse(graph_data=GraphData(links=[], nodes=[])) + graph_data: GraphData = AnnotationService.map_graph_data(graph_data_raw) + ar: AnnisResponse = AnnisResponse(solutions=[], uri="", exercise_id="", graph_data=graph_data) + return ar @staticmethod - def get_frequency_analysis(urn: str, is_csm: bool) -> List[FrequencyItem]: + def get_frequency_analysis(urn: str) -> List[FrequencyItem]: """ Collects frequency statistics for various combinations of linguistic annotations in a corpus. """ - if is_csm: - ar: AnnisResponse = CorpusService.get_corpus(urn, is_csm) - search_phenomena: List[List[Phenomenon]] = [] - for head_phenomenon in list(x for x in Phenomenon.__dict__.keys() if x.isupper()): - for base_phenomenon in list(x for x in Phenomenon.__dict__.keys() if x.isupper()): - search_phenomena.append([Phenomenon().__getattribute__(head_phenomenon), - Phenomenon().__getattribute__(base_phenomenon)]) - disk_urn: str = AnnotationService.get_disk_urn(urn) - fa: List[FrequencyItem] = [] - for search_phenomenon in search_phenomena: - if Phenomenon.DEPENDENCY in search_phenomenon: - continue - elif search_phenomenon[0] == Phenomenon.FEATS: - fa += FrequencyService.add_case_frequencies(disk_urn, search_phenomenon) - elif search_phenomenon[0] in [Phenomenon.LEMMA, Phenomenon.UPOSTAG]: - fa += FrequencyService.add_generic_frequencies(disk_urn, search_phenomenon) - FrequencyService.add_dependency_frequencies(ar.graph_data, fa) - return FrequencyService.extract_case_values(fa) - else: - url: str = Config.INTERNET_PROTOCOL + f"{Config.HOST_IP_CSM}:{Config.CORPUS_STORAGE_MANAGER_PORT}" + \ - Config.SERVER_URI_FREQUENCY - response: requests.Response = requests.get(url, params=dict(urn=urn)) - return [FrequencyItem.from_dict(x) for x in json.loads(response.text)] + ar: AnnisResponse = CorpusService.get_corpus(urn) + search_phenomena: List[List[Phenomenon]] = [] + for head_phenomenon in list(x for x in Phenomenon.__dict__.keys() if x.isupper()): + for base_phenomenon in list(x for x in Phenomenon.__dict__.keys() if x.isupper()): + search_phenomena.append([Phenomenon().__getattribute__(head_phenomenon), + Phenomenon().__getattribute__(base_phenomenon)]) + disk_urn: str = AnnotationService.get_disk_urn(urn) + fa: List[FrequencyItem] = [] + for search_phenomenon in search_phenomena: + if Phenomenon.DEPENDENCY in search_phenomenon: + continue + elif search_phenomenon[0] == Phenomenon.FEATS: + fa += FrequencyService.add_case_frequencies(disk_urn, search_phenomenon) + elif search_phenomenon[0] in [Phenomenon.LEMMA, Phenomenon.UPOSTAG]: + fa += FrequencyService.add_generic_frequencies(disk_urn, search_phenomenon) + FrequencyService.add_dependency_frequencies(ar.graph_data, fa) + return FrequencyService.extract_case_values(fa) @staticmethod def get_graph(cts_urn: str) -> MultiDiGraph: @@ -172,7 +154,7 @@ class CorpusService: except (NoSuchCorpus, GraphANNISException): annotations = CustomCorpusService.get_treebank_annotations(cts_urn) AnnotationService.map_conll_to_graph(corpus_name=cts_urn, conll=annotations, - cs=Config.CORPUS_STORAGE_MANAGER, file_name=cts_urn_disk) + file_name=cts_urn_disk) mdg = Config.CORPUS_STORAGE_MANAGER.subcorpus_graph(corpus_name=cts_urn_disk, document_ids=[doc_id]) return mdg try: @@ -190,8 +172,7 @@ class CorpusService: annotations = AnnotationService.parse_conll_string(annotations_conll) AnnotationService.add_urn_to_sentences(text_list, annotations) # each document gets its own corpus - AnnotationService.map_conll_to_graph(cts_urn_raw, annotations, Config.CORPUS_STORAGE_MANAGER, - cts_urn_raw_disk) + AnnotationService.map_conll_to_graph(cts_urn_raw, annotations, cts_urn_raw_disk) mdg = Config.CORPUS_STORAGE_MANAGER.subcorpus_graph(cts_urn_raw_disk, [doc_id]) if AnnotationService.has_urn_sentence_range(cts_urn): return CorpusService.get_sentence_range(mdg=mdg, cts_urn=cts_urn, file_name=cts_urn_disk) @@ -211,7 +192,7 @@ class CorpusService: # model matches as the basis for solutions so we can process them more easily later on matches: List[Solution] = [] for aql in aqls: - node_ids: List[str] = CorpusService.find_matches(urn, aql, is_csm=True) + node_ids: List[str] = CorpusService.find_matches(urn, aql) if len(search_phenomena) == 1: # it's cloze or markWords; the solutions only have a target, no explicit value if search_phenomena[0] == Phenomenon.DEPENDENCY: @@ -238,9 +219,9 @@ class CorpusService: return matches @staticmethod - def get_raw_text(urn: str, is_csm: bool): + def get_raw_text(urn: str): """ Retrieves the raw text for a corpus. """ - ar: AnnisResponse = CorpusService.get_corpus(cts_urn=urn, is_csm=is_csm) + ar: AnnisResponse = CorpusService.get_corpus(cts_urn=urn) text_raw = " ".join(x.annis_tok for x in ar.graph_data.nodes) # remove the spaces before punctuation because, otherwise, the parser won't work correctly return TextService.strip_whitespace(text_raw) @@ -266,7 +247,7 @@ class CorpusService: sent.metadata["urn"] = node_urns[tok_count] tok_count += len(sent.tokens) # each document gets its own corpus - AnnotationService.map_conll_to_graph(cts_urn, annotations, Config.CORPUS_STORAGE_MANAGER, file_name) + AnnotationService.map_conll_to_graph(cts_urn, annotations, file_name) return Config.CORPUS_STORAGE_MANAGER.subcorpus_graph(file_name, [cts_urn + '/doc1']) @staticmethod @@ -289,19 +270,12 @@ class CorpusService: return disk_reff @staticmethod - def get_subgraph(urn: str, aql: str, ctx_left: int = 5, ctx_right: int = 5, is_csm: bool = False) -> AnnisResponse: + def get_subgraph(urn: str, aql: str, ctx_left: int = 5, ctx_right: int = 5) -> AnnisResponse: """ Retrieves subgraph data for a given URN and node IDs. """ disk_urn: str = AnnotationService.get_disk_urn(urn) - if is_csm: - node_ids: List[str] = CorpusService.find_matches(urn, aql, is_csm=is_csm) - gd: GraphData = AnnotationService.get_single_subgraph(disk_urn, node_ids, ctx_left, ctx_right, is_csm) - return AnnisResponse(solutions=[], uri="", exercise_id="", graph_data=gd) - else: - url: str = Config.INTERNET_PROTOCOL + f"{Config.HOST_IP_CSM}:{Config.CORPUS_STORAGE_MANAGER_PORT}" + \ - Config.SERVER_URI_CSM_SUBGRAPH - response: requests.Response = requests.get(url, params=dict(urn=disk_urn, aqls=aql, - ctx_left=ctx_left, ctx_right=ctx_right)) - return AnnisResponse.from_dict(json.loads(response.text)) + node_ids: List[str] = CorpusService.find_matches(urn, aql) + gd: GraphData = AnnotationService.get_single_subgraph(disk_urn, node_ids, ctx_left, ctx_right) + return AnnisResponse(solutions=[], uri="", exercise_id="", graph_data=gd) @staticmethod def init_corpora() -> None: diff --git a/mc_backend/mcserver/app/services/exerciseService.py b/mc_backend/mcserver/app/services/exerciseService.py index fada64fb314488cb9e2187e24c2433f4543caeae..acf41a3eb99599753dd0c11dea1380d628613e89 100644 --- a/mc_backend/mcserver/app/services/exerciseService.py +++ b/mc_backend/mcserver/app/services/exerciseService.py @@ -21,7 +21,7 @@ class ExerciseService: return ExerciseData(graph=graph_data, solutions=solutions, uri=xml_url) @staticmethod - def update_exercises(is_csm: bool) -> None: + def update_exercises() -> None: """Deletes old exercises.""" if DatabaseService.has_table(Config.DATABASE_TABLE_EXERCISE): exercises: List[Exercise] = DatabaseService.query(Exercise) @@ -36,8 +36,8 @@ class ExerciseService: DatabaseService.commit() # manually add text complexity measures for old exercises elif not exercise.text_complexity: - ar: AnnisResponse = CorpusService.get_corpus(exercise.urn, is_csm=is_csm) + ar: AnnisResponse = CorpusService.get_corpus(exercise.urn) tc: TextComplexity = TextComplexityService.text_complexity( - TextComplexityMeasure.all.name, exercise.urn, is_csm, ar.graph_data) + TextComplexityMeasure.all.name, exercise.urn, ar.graph_data) exercise.text_complexity = tc.all DatabaseService.commit() diff --git a/mc_backend/mcserver/app/services/frequencyService.py b/mc_backend/mcserver/app/services/frequencyService.py index 768f03330eafcde1acd2d57aca957c27f3657c92..a5c4e8b2526b2b36cd595db98173e0f20e2e86b5 100644 --- a/mc_backend/mcserver/app/services/frequencyService.py +++ b/mc_backend/mcserver/app/services/frequencyService.py @@ -1,4 +1,4 @@ -from typing import List, Dict +from typing import List, Dict, Any from graphannis.cs import FrequencyTableEntry @@ -30,10 +30,10 @@ class FrequencyService: List[FrequencyItem]: """Adds frequency information for specific case annotations in a corpus.""" definition += search_phenomena[1] - result: List[FrequencyTableEntry] = Config.CORPUS_STORAGE_MANAGER.frequency( + results: List[FrequencyTableEntry] = Config.CORPUS_STORAGE_MANAGER.frequency( corpus_name=urn, query=aql, definition=definition) - return [FrequencyItem(x.count, search_phenomena, [x.values[0], x.values[1]]) for x in - result] + return [FrequencyItem(int(str(x.count)), search_phenomena, [x.values[0], x.values[1]]) for x in + results] @staticmethod def add_dependency_frequencies(graph_data: GraphData, fa: List[FrequencyItem]): diff --git a/mc_backend/mcserver/app/services/textComplexityService.py b/mc_backend/mcserver/app/services/textComplexityService.py index 7f538652db7b8431ef6b4d5aef44b33f58adeed2..0dc411871e11e0ee96184070a2e5c376877943e3 100644 --- a/mc_backend/mcserver/app/services/textComplexityService.py +++ b/mc_backend/mcserver/app/services/textComplexityService.py @@ -1,11 +1,6 @@ -import json from typing import Dict, List, Set - -import requests -from mcserver import Config -from mcserver.app.models import GraphData, TextComplexity, TextComplexityMeasure, AnnisResponse +from mcserver.app.models import GraphData, TextComplexity, TextComplexityMeasure from mcserver.app.services import TextService, AnnotationService, CorpusService -from openapi.openapi_server.models import TextComplexityForm class TextComplexityService: @@ -13,14 +8,14 @@ class TextComplexityService: current_graph_data: GraphData @staticmethod - def average_sentence_length(urn: str, is_csm: bool) -> float: + def average_sentence_length(urn: str) -> float: """ Gives back the average sentence length. """ - words: int = TextComplexityService.how_many_words(urn, is_csm) - sentences: int = TextComplexityService.how_many_sentences(urn, is_csm) + words: int = TextComplexityService.how_many_words(urn) + sentences: int = TextComplexityService.how_many_sentences(urn) return words / sentences @staticmethod - def average_word_length(urn: str, is_csm: bool) -> float: + def average_word_length(urn: str) -> float: """Gives back the mean number of characters for word.""" tok_lengths: List[int] = [len(x.annis_tok) for x in TextComplexityService.current_graph_data.nodes] return sum(tok_lengths) / len(tok_lengths) @@ -83,88 +78,88 @@ class TextComplexityService: return set(x.annis_tok for x in TextComplexityService.current_graph_data.nodes) @staticmethod - def how_many_ablativi_absoluti(urn: str, is_csm: bool) -> int: + def how_many_ablativi_absoluti(urn: str) -> int: """ Gives back the number of ablativi absoluti in the text. """ aql: str = "tok ->dep[deprel=/(nsubj|nsubj:pass|csubj|csubj:pass)/] feats=/.*Abl.*/" - node_ids: List[str] = CorpusService.find_matches(urn, aql, is_csm=is_csm) + node_ids: List[str] = CorpusService.find_matches(urn, aql) return round(len(node_ids) / 2) @staticmethod - def how_many_gerunds(urn: str, is_csm: bool) -> int: + def how_many_gerunds(urn: str) -> int: """ Gives back the number of gerunds in the text. """ aql: str = "feats=/.*VerbForm=Ger.*/" - node_ids: List[str] = CorpusService.find_matches(urn, aql, is_csm=is_csm) + node_ids: List[str] = CorpusService.find_matches(urn, aql) # TODO: gerundivo (VerbForm=Gdv.*) return len(node_ids) @staticmethod - def how_many_infinitive_constructions(urn: str, is_csm: bool) -> int: + def how_many_infinitive_constructions(urn: str) -> int: """ Gives back the number of infinitive constructions (AcI, NcI) in the text. """ # get Nominativus cum Infinitivo aql: str = 'feats=/.*Inf.*/ ->dep[deprel=/(nsubj|nsubj:pass|csubj|csubj:pass)/] feats=/.*Acc.*/' - node_ids: List[str] = CorpusService.find_matches(urn, aql, is_csm=is_csm) + node_ids: List[str] = CorpusService.find_matches(urn, aql) # get Accusativus cum Infinitivo aql = 'feats=/.*Acc.*/ ->dep[deprel=/(xcomp|ccomp)/] feats=/.*Inf.*/' - node_ids += CorpusService.find_matches(urn, aql, is_csm=is_csm) + node_ids += CorpusService.find_matches(urn, aql) return round(len(node_ids) / 2) @staticmethod - def how_many_main_clauses(urn: str, is_csm: bool) -> int: + def how_many_main_clauses(urn: str) -> int: """ Gives back how many clauses are in the text. """ # TODO: ellipsis not counted aql: str = "deps" - node_ids: List[str] = CorpusService.find_matches(urn, aql, is_csm=is_csm) + node_ids: List[str] = CorpusService.find_matches(urn, aql) return len(node_ids) @staticmethod - def how_many_participles(urn: str, is_csm: bool) -> int: + def how_many_participles(urn: str) -> int: """Gives back how many participles are in the text""" aql: str = "feats=/.*VerbForm=Part.*/" - node_ids: List[str] = CorpusService.find_matches(urn, aql, is_csm=is_csm) + node_ids: List[str] = CorpusService.find_matches(urn, aql) return len(node_ids) @staticmethod - def how_many_pos(urn: str, is_csm: bool) -> int: + def how_many_pos(urn: str) -> int: """ Gives back how many different parts of speech are in the text. """ pos_list: List[str] = [x.udep_upostag for x in TextComplexityService.current_graph_data.nodes] # TODO: visualize pos + pos density return len(set(pos_list)) @staticmethod - def how_many_punctuation(urn: str, is_csm: bool) -> int: + def how_many_punctuation(urn: str) -> int: """ Gives back how many different parts of speech are in the text. """ return len([x for x in TextComplexityService.current_graph_data.nodes if x.udep_upostag == "PUNCT"]) @staticmethod - def how_many_sentences(urn: str, is_csm: bool) -> int: + def how_many_sentences(urn: str) -> int: """Gives back the number of sentences in the text""" sentences_ids: List[int] = [AnnotationService.get_sentence_id(node) for node in TextComplexityService.current_graph_data.nodes] return len(set(sentences_ids)) @staticmethod - def how_many_sub_clauses(urn: str, is_csm: bool) -> int: + def how_many_sub_clauses(urn: str) -> int: """Gives back the number of subordinate clauses in the text. """ aql: str = 'tok ->dep[deprel=/(acl|advcl|ccomp|xcomp)/] upostag="VERB"' - node_ids: List[str] = CorpusService.find_matches(urn, aql, is_csm=is_csm) + node_ids: List[str] = CorpusService.find_matches(urn, aql) # TODO: degree of sub clauses; ellipsis not counted return round(len(node_ids) / 2) @staticmethod - def how_many_types(urn: str, is_csm: bool) -> int: + def how_many_types(urn: str) -> int: """ Gives back the numbers of types. """ types: Set[str] = TextComplexityService.get_types() return len(types) @staticmethod - def how_many_words(urn: str, is_csm: bool) -> int: + def how_many_words(urn: str) -> int: """ Gives back the number of words in the text. """ return len(TextComplexityService.current_graph_data.nodes) @staticmethod - def lexical_density(urn: str, is_csm: bool) -> float: + def lexical_density(urn: str) -> float: """ Gives back the lexical density of the text. """ - token_count: int = TextComplexityService.how_many_words(urn, is_csm) + token_count: int = TextComplexityService.how_many_words(urn) types: Set[str] = TextComplexityService.get_types() content_words: Set[str] = set() for word in types: @@ -173,24 +168,15 @@ class TextComplexityService: return len(content_words) / token_count @staticmethod - def text_complexity(measure: str, urn: str, is_csm: bool, gd: GraphData) -> TextComplexity: + def text_complexity(measure: str, urn: str, gd: GraphData) -> TextComplexity: """ Defines the text complexity according to the kind of measure requested. """ - if is_csm: - measure_map: Dict[str, callable] = TextComplexityService.get_measure_map() - TextComplexityService.current_graph_data = gd - tc: TextComplexity = TextComplexity() - if measure == TextComplexityMeasure.all.name: - for key in measure_map: - tc.__setattr__(key, round(measure_map[key](urn, is_csm), 2)) - tc.all = TextComplexityService.calculate_overall_complexity(tc) - else: - tc.__setattr__(measure, round(measure_map[measure](urn, is_csm), 2)) - return tc + measure_map: Dict[str, callable] = TextComplexityService.get_measure_map() + TextComplexityService.current_graph_data = gd + tc: TextComplexity = TextComplexity() + if measure == TextComplexityMeasure.all.name: + for key in measure_map: + tc.__setattr__(key, round(measure_map[key](urn), 2)) + tc.all = TextComplexityService.calculate_overall_complexity(tc) else: - url: str = f"{Config.INTERNET_PROTOCOL}{Config.HOST_IP_CSM}:" + \ - f"{Config.CORPUS_STORAGE_MANAGER_PORT}{Config.SERVER_URI_TEXT_COMPLEXITY}" - ar: AnnisResponse = AnnisResponse(graph_data=gd) - tcf: TextComplexityForm = TextComplexityForm(urn=urn, measure=TextComplexityMeasure.all.name, - annis_response=json.dumps(ar.to_dict())) - response: requests.Response = requests.post(url, data=tcf.to_dict()) - return TextComplexity.from_dict(json.loads(response.text)) + tc.__setattr__(measure, round(measure_map[measure](urn), 2)) + return tc diff --git a/mc_backend/mcserver/app/services/textService.py b/mc_backend/mcserver/app/services/textService.py index f6034d5b46e82636223b92a11194823e916a3d51..842e8ec7abfc41707744ee120e2b5113c9068c06 100644 --- a/mc_backend/mcserver/app/services/textService.py +++ b/mc_backend/mcserver/app/services/textService.py @@ -82,7 +82,7 @@ class TextService: return TextService.is_match(target_lemma, vocabulary_set) @staticmethod - def check_lemma_suffix(target_lemma: str, vocabulary_set: Set[str]): + def check_lemma_suffix(target_lemma: str, vocabulary_set: Set[str]) -> bool: """ Checks whether slightly different forms of the lemma are matched by the vocabulary set. """ for suffix in TextService.suffix_map: if target_lemma[-len(suffix):] == suffix: diff --git a/mc_backend/mcserver/config.py b/mc_backend/mcserver/config.py index c528a64768b3b23436377a7e8f574eaaf6978acc..9c198a6071c80d8dad725429ee2ebe3fe490e12a 100644 --- a/mc_backend/mcserver/config.py +++ b/mc_backend/mcserver/config.py @@ -19,8 +19,7 @@ class Config(object): CURRENT_WORKING_DIRECTORY = os.getcwd() CURRENT_WORKING_DIRECTORY_PARENT = os.path.dirname(CURRENT_WORKING_DIRECTORY) CURRENT_WORKING_DIRECTORY_PARTS = os.path.split(CURRENT_WORKING_DIRECTORY) # [::-1] - GRAPH_DATABASE_DIR = os.path.join(os.sep, "tmp", "graphannis-data") - CSM_DIRECTORY = os.path.join(CURRENT_WORKING_DIRECTORY, "csm") + GRAPH_DATABASE_DIR = os.path.join(os.sep, "tmp", "graphannis-data", str(os.getpid())) MC_SERVER_DIRECTORY = CURRENT_WORKING_DIRECTORY if \ os.path.split(CURRENT_WORKING_DIRECTORY)[-1] == "mcserver" else os.path.join(CURRENT_WORKING_DIRECTORY, "mcserver") @@ -37,7 +36,6 @@ class Config(object): TREEBANKS_PATH = os.path.join(ASSETS_DIRECTORY, "treebanks") TREEBANKS_PROIEL_PATH = os.path.join(TREEBANKS_PATH, "proiel") - API_SPEC_CSM_FILE_PATH = os.path.join(CSM_DIRECTORY, "csm_api.yaml") API_SPEC_MCSERVER_FILE_PATH = os.path.join(MC_SERVER_DIRECTORY, "mcserver_api.yaml") API_SPEC_MODELS_YAML_FILE_PATH = os.path.join(Path(MC_SERVER_DIRECTORY).parent, "openapi_models.yaml") AQL_CASE = "/.*Case=.*/" @@ -73,7 +71,6 @@ class Config(object): DATABASE_URL_FALLBACK = DATABASE_URL_DOCKER if IS_DOCKER else DATABASE_URL_SQLITE DATABASE_URL = os.environ.get("DATABASE_URL", DATABASE_URL_FALLBACK) DEBUG = False - DOCKER_SERVICE_NAME_CSM = "csm" DOCKER_SERVICE_NAME_MCSERVER = "mcserver" ERROR_MESSAGE_BAD_REQUEST = \ "The server cannot or will not process the request due to something that is perceived to be a client " \ @@ -101,7 +98,6 @@ class Config(object): H5P_DIRECTORY = "/home/mc/h5p" if IS_DOCKER else os.path.join(MC_FRONTEND_SRC_DIRECTORY, "assets", "h5p") # Windows: use 127.0.0.1 as host IP fallback HOST_IP_FALLBACK = "0.0.0.0" - HOST_IP_CSM = DOCKER_SERVICE_NAME_CSM if IS_DOCKER else HOST_IP_FALLBACK HOST_IP_MCSERVER = DOCKER_SERVICE_NAME_MCSERVER if IS_DOCKER else HOST_IP_FALLBACK HOST_PORT = 5000 INTERNET_PROTOCOL = "http://" @@ -112,7 +108,6 @@ class Config(object): INTERVAL_STATIC_EXERCISES = 60 * 60 * 24 IS_PRODUCTION = os.environ.get("FLASK_ENV_VARIABLE", "development") == "production" LEARNING_ANALYTICS_DIRECTORY = os.path.join(FILES_DIRECTORY, "learning_analytics") - LOG_PATH_CSM = f"{DOCKER_SERVICE_NAME_CSM}.log" LOG_PATH_MCSERVER = f"{DOCKER_SERVICE_NAME_MCSERVER}.log" MIGRATIONS_DIRECTORY = os.path.join(MC_SERVER_DIRECTORY, "migrations") NETWORK_GRAPH_TMP_PATH = os.path.join(TMP_DIRECTORY, "graph.svg") @@ -126,10 +121,7 @@ class Config(object): SECRET_KEY = 'this-really-needs-to-be-changed' # BEGIN endpoints # use these endpoints to access the REST API by appending them to the host name (e.g. "http://127.0.0.1:5000") - SERVER_URI_ANNIS_FIND = SERVER_URI_BASE + "find" SERVER_URI_CORPORA = SERVER_URI_BASE + "corpora" - SERVER_URI_CSM = "/" - SERVER_URI_CSM_SUBGRAPH = SERVER_URI_CSM + "subgraph" SERVER_URI_EXERCISE = SERVER_URI_BASE + "exercise" SERVER_URI_EXERCISE_LIST = SERVER_URI_BASE + "exerciseList" SERVER_URI_FAVICON = "/favicon.ico" diff --git a/mc_backend/mcserver/mcserver_api.yaml b/mc_backend/mcserver/mcserver_api.yaml index b0947bcff3a75706f464507058d90ff5b7cc2698..b979898011141bef2bb6fba88cb2be35b26aefa4 100644 --- a/mc_backend/mcserver/mcserver_api.yaml +++ b/mc_backend/mcserver/mcserver_api.yaml @@ -209,7 +209,7 @@ paths: summary: Returns results for a frequency query from ANNIS for a given CTS URN. operationId: mcserver.app.api.frequencyAPI.get responses: - "200": + 200: description: Frequency analysis, i.e. a list of frequency items. content: application/json: @@ -452,5 +452,3 @@ components: schemas: ExerciseAuthorExtension: $ref: '../openapi_models.yaml#/components/schemas/ExerciseAuthor' - TextComplexityFormExtension: - $ref: '../openapi_models.yaml#/components/schemas/TextComplexityForm' diff --git a/mc_backend/mocks.py b/mc_backend/mocks.py index 5f08be7c757f3cfaa012d8878185458e52eb2806..e4041f56f0afddcd6e6f074b53feb71904518e1c 100644 --- a/mc_backend/mocks.py +++ b/mc_backend/mocks.py @@ -20,9 +20,10 @@ from mcserver import Config, TestingConfig from mcserver.app import db, shutdown_session from mcserver.app.models import Phenomenon, PartOfSpeech, CitationLevel, ExerciseData, GraphData, \ LinkMC, NodeMC, Language, Dependency, Case, AnnisResponse, Solution, TextPart, Citation, ExerciseMC, CorpusMC, \ - SolutionElement, ReferenceableText, ExerciseType + SolutionElement, ReferenceableText, ExerciseType, make_solution_element_from_salt_id from mcserver.app.services import AnnotationService, CustomCorpusService, TextService, DatabaseService from mcserver.models_auto import Corpus, Exercise, UpdateInfo +from openapi.openapi_server.models import TextComplexity class MockFilterBy: @@ -76,6 +77,7 @@ class TestHelper: def __init__(self, app: Flask): self.app: Flask = app self.app_context: AppContext = self.app.app_context() + self.app_context.push() self.client: FlaskClient = self.app.test_client() @staticmethod @@ -84,8 +86,6 @@ class TestHelper: if len(Mocks.app_dict) and list(Mocks.app_dict.keys())[0] != class_name: if Config.CORPUS_STORAGE_MANAGER: Config.CORPUS_STORAGE_MANAGER.__exit__(None, None, None) - if os.path.exists(Config.GRAPH_DATABASE_DIR): - shutil.rmtree(Config.GRAPH_DATABASE_DIR) list(Mocks.app_dict.values())[0].app_context.pop() shutdown_session() db.drop_all() @@ -118,7 +118,9 @@ class Mocks: "misc": {"SpaceAfter": "No"}}, {"id": 6, "form": ".", "lemma": ".", "upostag": "PUNCT", "xpostag": "Punc", "feats": None, "head": 1, "deprel": "punct", "deps": None, "misc": None}], - metadata=OrderedDict([("sent_id", "1"), ("urn", "urn:cts:latinLit:phi0448.phi001.perseus-lat2:1.1.1-1.1.1")]))] + metadata=OrderedDict( + [("sent_id", "1"), ("newpar", None), + ("urn", "urn:cts:latinLit:phi0448.phi001.perseus-lat2:1.1.1-1.1.1")]))] annis_response_dict: dict = {"graph_data_raw": {"directed": True, "multigraph": True, "graph": {}, "nodes": [ {"annis::node_name": "urn:custom:latinLit:proiel.pal-agr.lat:1.1.1/doc1#sent159692tok1", "annis::node_type": "node", "annis::type": "node", "annis::tok": "Pars", "udep::lemma": "pars", @@ -707,7 +709,8 @@ class Mocks: udep_lemma="udl"), NodeMC(annis_node_name="ann", annis_node_type="ant", annis_tok="atk", annis_type="atp", id="doc1#sent1tok2", udep_upostag="udupt", udep_xpostag="udxpt", udep_feats="udf", - udep_lemma="udl")]), uri="/test", solutions=[]) + udep_lemma="udl")]), uri="/test", solutions=[ + Solution(target=make_solution_element_from_salt_id("doc1#sent1tok1"))]) exercise_pdf: bytes = b'%PDF-1.4\n%\x93\x8c\x8b\x9e ReportLab Generated PDF document http://www.reportlab.com\n1 0 obj\n<<\n/F1 2 0 R\n>>\nendobj\n2 0 obj\n<<\n/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font\n>>\nendobj\n3 0 obj\n<<\n/BitsPerComponent 1 /ColorSpace /DeviceGray /Filter [ /ASCII85Decode ] /Height 23 /Length 223 /Subtype /Image \n /Type /XObject /Width 24\n>>\nstream\n\n 003B00 002700 002480 0E4940 114920 14B220 3CB650\n 75FE88 17FF8C 175F14 1C07E2 3803C4 703182 F8EDFC\n B2BBC2 BB6F84 31BFC2 18EA3C 0E3E00 07FC00 03F800\n 1E1800 1FF800>\n endstream\nendobj\n4 0 obj\n<<\n/Contents 8 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 7 0 R /Resources <<\n/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] /XObject <<\n/FormXob.c7485dcc8d256a6f197ed7802687f252 3 0 R\n>>\n>> /Rotate 0 /Trans <<\n\n>> \n /Type /Page\n>>\nendobj\n5 0 obj\n<<\n/PageMode /UseNone /Pages 7 0 R /Type /Catalog\n>>\nendobj\n6 0 obj\n<<\n/Author () /CreationDate' exercise_xml: str = '<quiz> <question type="matching"> <name> <text></text> </name> <questiontext format="html"> <text><![CDATA[<br><p></p><p></p><br><br>]]></text> </questiontext> <generalfeedback format="html"> <text></text> </generalfeedback> <defaultgrade>1.0000000</defaultgrade> <penalty>0.1000000</penalty> <hidden>0</hidden> <shuffleanswers>1</shuffleanswers> <correctfeedback format="html"> <text></text> </correctfeedback> <partiallycorrectfeedback format="html"> <text></text> </partiallycorrectfeedback> <incorrectfeedback format="html"> <text></text> </incorrectfeedback> <shownumcorrect/> <tags></tags> </question></quiz>' graph_data_raw_part: str = '{"directed":true,"multigraph":true,"graph":{},"nodes":[{"annis::node_name":"' @@ -727,7 +730,7 @@ class Mocks: '{"answers": [ { "correct": true, "text": "<div>provincia<\/div>\\n", "tipsAndFeedback": { "tip": "", "chosenFeedback": "", "notChosenFeedback": "" } }, { "correct": true, "text": "<div>civis<\/div>\\n", "tipsAndFeedback": { "tip": "", "chosenFeedback": "", "notChosenFeedback": "" } }, { "correct": true, "text": "<div>socius<\/div>\\n", "tipsAndFeedback": { "tip": "", "chosenFeedback": "", "notChosenFeedback": "" } }, { "correct": true, "text": "<div>publicus<\/div>\\n", "tipsAndFeedback": { "tip": "", "chosenFeedback": "", "notChosenFeedback": "" } }, { "correct": false, "text": "<div>adventus<\/div>\\n", "tipsAndFeedback": { "tip": "", "chosenFeedback": "", "notChosenFeedback": "" } }, { "correct": false, "text": "<div>omnis<\/div>\\n", "tipsAndFeedback": { "tip": "", "chosenFeedback": "", "notChosenFeedback": "" } }, { "correct": false, "text": "<div>autem<\/div>\\n", "tipsAndFeedback": { "tip": "", "chosenFeedback": "", "notChosenFeedback": "" } }, { "correct": false, "text": "<div>res<\/div>\\n", "tipsAndFeedback": { "tip": "", "chosenFeedback": "", "notChosenFeedback": "" } } ], "question": "<p>Choose words (4) that belong to the word field <b>government</b>:<\/p>\\n"}') h5p_json_voc_list: str = '{"questions":["<p><h4>atque </h4> *and : and*</p>"]}' headers_form_data: dict = {"Content-Type": "application/x-www-form-urlencoded"} - kwic_svg: bytes = b'"<svg height=\\"360\\" id=\\"svg1\\" width=\\"1252\\">' + kwic_svg: bytes = b'"<svg height=\\"280\\" id=\\"svg1\\" width=\\"1232\\">' nodes: List[dict] = [{"annis::node_name": "urn:cts:latinLit:phi0448.phi001.perseus-lat2:1.1.1-1.1.1/doc1#sent1tok1", "annis::node_type": "node", "annis::type": "node", "annis::tok": "Caesar", "udep::upostag": "VERB", "udep::xpostag": "L3|modJ|tem3|gen4|stAV", @@ -762,7 +765,10 @@ class Mocks: static_exercises_udpipe_string: str = "1\tscribere\tscribere\n1\tcommovere\tcommovere\n1\tC\tC\n1\tgaudere\tgaudere\n1\tsignum\tsignum\n1\tvas\tvas\n1\tclarus\tclarus\n1\tcondicio\tcondicio\n1\tcom\tcum\n1\tprae\tprae\n1\tmovere\tmovere\n1\tducere\tducere\n1\tde\tde\n1\tcum\tcum\n1\tistam\tiste\n1\tnationum\tnatio\n1\tclarissimae\tclarus\n1\tmoderationem\tmoderatio\n1\tanimi\tanimus\n1\tomnium\tomnis\n1\tgentium\tgens\n1\tac\tac\n1\tvirtutem\tvirtus\n1\tprovinciae\tprovincia\n1\tCaesar\tCaesar\n1\test\tesse\n1\tsatis\tsatis\n1\tgovernment\tgovernment\n1\tsocius\tsocius\n1\tprovincia\tprovincia\n1\tpublicus\tpublicus\n1\tcivis\tcivis\n1\tatque\tatque" subgraph_json: str = '{"exercise_id":"","exercise_type":"","frequency_analysis":null,"graph_data":{"directed":true,"graph":{},"links":[],"multigraph":true,"nodes":[{"annis_node_name":"urn:cts:latinLit:phi0448.phi001.perseus-lat2:1.1.1-1.1.1/doc1#sent1tok3","annis_node_type":"node","annis_tok":"Galli","annis_type":"node","id":"salt:/urn:cts:latinLit:phi0448.phi001.perseus-lat2:1.1.1-1.1.1/doc1#sent1tok3","is_oov":null,"udep_lemma":"Gallo","udep_upostag":"VERB","udep_xpostag":"L3|modQ|tem1|stAC","udep_feats":"Tense=Pres|VerbForm=Inf|Voice=Pass","solution":null}]},"solutions":[],"text_complexity":null,"uri":""}' test_args: List[str] = ["tests.py", "-test"] - text_complexity_json_string: str = '{"all":54.53,"avg_w_len":5.79,"avg_w_per_sent":17.33,"lex_den":0.73,"n_abl_abs":0,"n_clause":1,"n_gerund":1,"n_inf":1,"n_part":1,"n_punct":3,"n_sent":3,"n_subclause":0,"n_types":48,"n_w":52,"pos":11}' + text_complexity: TextComplexity = TextComplexity.from_dict( + {"all": 54.53, "avg_w_len": 5.79, "avg_w_per_sent": 17.33, "lex_den": 0.73, "n_abl_abs": 0, + "n_clause": 1, "n_gerund": 1, "n_inf": 1, "n_part": 1, "n_punct": 3, "n_sent": 3, "n_subclause": 0, + "n_types": 48, "n_w": 52, "pos": 11}) text_list: List[ReferenceableText] = [ ReferenceableText(raw_text.split(".")[0], "urn:cts:latinLit:phi0448.phi001.perseus-lat2:1.1.1"), ReferenceableText(raw_text.split(".")[1], "urn:cts:latinLit:phi0448.phi001.perseus-lat2:1.1.2")] diff --git a/mc_backend/openapi_models.yaml b/mc_backend/openapi_models.yaml index a5b2a4eaa8ea6065126c62f55724ad6e557260c9..a61e02983419592ffd3bb53a7288fd4e475bfe11 100644 --- a/mc_backend/openapi_models.yaml +++ b/mc_backend/openapi_models.yaml @@ -737,28 +737,6 @@ components: type: integer description: Number of distinct part of speech tags in the given corpus. example: 1 - TextComplexityForm: - type: object - x-body-name: complexity_data - description: Relevant parameters for measuring the text complexity of a text passage. - properties: - measure: - type: string - description: Label of the desired measure for text complexity. - example: all - urn: - type: string - description: CTS URN for the text passage from which the text complexity should be calculated. - example: urn:cts:latinLit:phi0448.phi001.perseus-lat2:1.1.1-1.1.1 - annis_response: - type: string - description: Serialized ANNIS response. - example: "{}" - required: - - measure - - urn - discriminator: - propertyName: measure UpdateInfo: description: Timestamps for updates of various resources. type: object diff --git a/mc_backend/run_csm.py b/mc_backend/run_csm.py deleted file mode 100644 index 6265a76d2dfb57c4338692157ff6f84e1fcbb4e4..0000000000000000000000000000000000000000 --- a/mc_backend/run_csm.py +++ /dev/null @@ -1,7 +0,0 @@ -from flask import Flask -from csm import get_app, get_cfg - -app: Flask = get_app() - -if __name__ == "__main__": - app.run(host=get_cfg().HOST_IP_CSM, port=get_cfg().CORPUS_STORAGE_MANAGER_PORT, use_reloader=False) diff --git a/mc_backend/tests.py b/mc_backend/tests.py index 7425f9421b6c23090b95ab4837099c8e17a28e92..eef4d6df2ebb46d4994f5bcb19536c9c4154dbd9 100644 --- a/mc_backend/tests.py +++ b/mc_backend/tests.py @@ -8,10 +8,9 @@ from threading import Thread from unittest.mock import patch, MagicMock, mock_open from zipfile import ZipFile +import flask import rapidjson as json -import re import shutil -import string import sys import time import unittest @@ -23,6 +22,7 @@ from typing import Dict, List, Tuple, Type, Any from conllu import TokenList from flask import Flask from gensim.models import Word2Vec +from graphannis.errors import NoSuchCorpus from lxml import etree from networkx import MultiDiGraph, Graph from requests import HTTPError @@ -30,23 +30,22 @@ from sqlalchemy.exc import OperationalError, InvalidRequestError from sqlalchemy.orm import session from werkzeug.wrappers import Response -import csm import mcserver -from csm import create_csm_app -from mcserver.app import create_app, db, start_updater, full_init -from mcserver.app.api.exerciseAPI import map_exercise_data_to_database +from mcserver.app import create_app, db, start_updater, full_init, log_exception +from mcserver.app.api.exerciseAPI import map_exercise_data_to_database, get_graph_data +from mcserver.app.api.kwicAPI import handle_exercise_data +from mcserver.app.api.vocabularyAPI import check_vocabulary from mcserver.app.models import ResourceType, FileType, ExerciseType, ExerciseData, \ NodeMC, LinkMC, GraphData, Phenomenon, CustomCorpus, AnnisResponse, Solution, DownloadableFile, Language, \ - VocabularyCorpus, TextComplexityMeasure, CitationLevel, FrequencyItem, TextComplexity, Dependency, \ - PartOfSpeech, Choice, XapiStatement, ExerciseMC, CorpusMC, make_solution_element_from_salt_id, Sentence, \ - ReferenceableText + VocabularyCorpus, TextComplexityMeasure, CitationLevel, FrequencyItem, Choice, XapiStatement, ExerciseMC, \ + CorpusMC, make_solution_element_from_salt_id, Sentence, ReferenceableText, Dependency, PartOfSpeech from mcserver.app.services import AnnotationService, CorpusService, FileService, CustomCorpusService, \ - DatabaseService, XMLservice, TextService, FrequencyService, ExerciseService + DatabaseService, XMLservice, TextService, FrequencyService, ExerciseService, TextComplexityService from mcserver.config import TestingConfig, Config from mcserver.models_auto import Corpus, Exercise, UpdateInfo, LearningResult from mocks import Mocks, MockResponse, MockW2V, MockQuery, TestHelper -from openapi.openapi_server.models import VocabularyForm, VocabularyMC, TextComplexityForm, ExerciseForm, \ - KwicForm, VectorNetworkForm, MatchingExercise, ExerciseTypePath, H5PForm +from openapi.openapi_server.models import VocabularyForm, VocabularyMC, ExerciseForm, \ + KwicForm, VectorNetworkForm, MatchingExercise, ExerciseTypePath, H5PForm, TextComplexity class McTestCase(unittest.TestCase): @@ -61,13 +60,6 @@ class McTestCase(unittest.TestCase): if kwargs['params']['request'] == 'GetCapabilities': return MockResponse(Mocks.cts_capabilities_xml) return MockResponse(Mocks.cts_reff_xml) - elif url.endswith(Config.SERVER_URI_CSM_SUBGRAPH): - return MockResponse(json.dumps(Mocks.annis_response_dict)) - - def mocked_requests_post(*args, **kwargs): - url: str = args[0] - if url.endswith(Config.SERVER_URI_TEXT_COMPLEXITY): - return MockResponse(Mocks.text_complexity_json_string) def setUp(self): """Initializes the testing environment.""" @@ -202,38 +194,6 @@ class McTestCase(unittest.TestCase): db.session.query(Exercise).delete() session.make_transient(Mocks.exercise) - def test_api_exercise_post(self): - """ Creates a new exercise from scratch. """ - - def post_response(*args, **kwargs): - url: str = args[0] - if url.endswith("/"): - return MockResponse("}{") - elif url.endswith(str(Config.CORPUS_STORAGE_MANAGER_PORT)): - return MockResponse(json.dumps(Mocks.annis_response_dict)) - else: - return MockResponse(Mocks.text_complexity_json_string) - - db.session.query(UpdateInfo).delete() - ui_exercises: UpdateInfo = UpdateInfo.from_dict(resource_type=ResourceType.exercise_list.name, - last_modified_time=1, created_time=1) - db.session.add(ui_exercises) - DatabaseService.commit() - ef: ExerciseForm = ExerciseForm(urn=Mocks.exercise.urn, type=ExerciseType.matching.value, - search_values=Mocks.exercise.search_values, instructions='abc') - with patch.object(mcserver.app.api.exerciseAPI.requests, "post", side_effect=post_response): - response: Response = Mocks.app_dict[self.class_name].client.post( - Config.SERVER_URI_EXERCISE, headers=Mocks.headers_form_data, data=ef.to_dict()) - ar: AnnisResponse = AnnisResponse.from_dict(json.loads(response.get_data(as_text=True))) - self.assertEqual(len(ar.solutions), 3) - Config.CORPUS_STORAGE_MANAGER_PORT = f"{Config.CORPUS_STORAGE_MANAGER_PORT}/" - response: Response = Mocks.app_dict[self.class_name].client.post( - Config.SERVER_URI_EXERCISE, headers=Mocks.headers_form_data, data=ef.to_dict()) - self.assertEqual(response.status_code, 500) - Config.CORPUS_STORAGE_MANAGER_PORT = int(Config.CORPUS_STORAGE_MANAGER_PORT[:-1]) - Mocks.app_dict[self.class_name].app_context.push() - db.session.query(UpdateInfo).delete() - def test_api_exercise_list_get(self): """ Retrieves a list of available exercises. """ ui_exercises: UpdateInfo = UpdateInfo.from_dict(resource_type=ResourceType.exercise_list.name, @@ -262,6 +222,29 @@ class McTestCase(unittest.TestCase): db.session.query(UpdateInfo).delete() session.make_transient(Mocks.exercise) + def test_api_exercise_post(self): + """ Creates a new exercise from scratch. """ + db.session.query(UpdateInfo).delete() + ui_exercises: UpdateInfo = UpdateInfo.from_dict(resource_type=ResourceType.exercise_list.name, + last_modified_time=1, created_time=1) + db.session.add(ui_exercises) + DatabaseService.commit() + ef: ExerciseForm = ExerciseForm(urn=Mocks.exercise.urn, type=ExerciseType.matching.value, + search_values=Mocks.exercise.search_values, instructions='abc') + with patch.object(mcserver.app.api.exerciseAPI, "get_graph_data") as mock_ggd: + mock_ggd.side_effect = [Mocks.annis_response_dict, ValueError()] + with patch.object(mcserver.app.services.textComplexityService.TextComplexityService, + "text_complexity", return_value=Mocks.text_complexity): + response: Response = Mocks.app_dict[self.class_name].client.post( + Config.SERVER_URI_EXERCISE, headers=Mocks.headers_form_data, data=ef.to_dict()) + ar: AnnisResponse = AnnisResponse.from_dict(json.loads(response.get_data(as_text=True))) + self.assertEqual(len(ar.solutions), 3) + response = Mocks.app_dict[self.class_name].client.post( + Config.SERVER_URI_EXERCISE, headers=Mocks.headers_form_data, data=ef.to_dict()) + self.assertEqual(response.status_code, 500) + Mocks.app_dict[self.class_name].app_context.push() + db.session.query(UpdateInfo).delete() + def test_api_file_get(self): """Gets an existing exercise""" ui_file: UpdateInfo = UpdateInfo.from_dict(resource_type=ResourceType.file_api_clean.name, @@ -311,13 +294,23 @@ class McTestCase(unittest.TestCase): def test_api_frequency_get(self): """ Requests a frequency analysis for a given URN. """ - with patch.object(mcserver.app.services.corpusService.requests, "get", return_value=MockResponse( - json.dumps([FrequencyItem(values=[], phenomena=[], count=0).to_dict()]))): - response: Response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_FREQUENCY, - query_string=dict(urn=Mocks.urn_custom)) + expected_fa: List[FrequencyItem] = [ + FrequencyItem(values=[Dependency.object.name], phenomena=[Phenomenon.DEPENDENCY], count=1), + FrequencyItem(values=[PartOfSpeech.adjective.name], phenomena=[Phenomenon.UPOSTAG], count=1)] + with patch.object( + mcserver.app.services.corpusService.CorpusService, "get_frequency_analysis", + side_effect=[[FrequencyItem(values=[], phenomena=[], count=0)], expected_fa]): + response: Response = Mocks.app_dict[self.class_name].client.get( + TestingConfig.SERVER_URI_FREQUENCY, query_string=dict(urn=Mocks.urn_custom)) result_list: List[dict] = json.loads(response.get_data(as_text=True)) fa: List[FrequencyItem] = [FrequencyItem.from_dict(x) for x in result_list] self.assertEqual(len(fa), 1) + response = Mocks.app_dict[self.class_name].client.get( + TestingConfig.SERVER_URI_FREQUENCY, query_string=dict(urn=Mocks.urn_custom)) + result_list: List[dict] = json.loads(response.get_data(as_text=True)) + fa: List[FrequencyItem] = [FrequencyItem.from_dict(x) for x in result_list] + self.assertEqual(fa[0].values, expected_fa[0].values) + self.assertEqual(fa[1].values[0], None) def test_api_h5p_get(self): """ Requests a H5P JSON file for a given exercise. """ @@ -389,26 +382,27 @@ class McTestCase(unittest.TestCase): response: Response = Mocks.app_dict[self.class_name].client.get("/") self.assertEqual(response.status_code, 404) - @patch('mcserver.app.services.textComplexityService.requests.post', side_effect=mocked_requests_post) - def test_api_raw_text_get(self, mock_post_tcs: MagicMock): + def test_api_raw_text_get(self): """ Retrieves the raw text for a given URN. """ - with patch.object(mcserver.app.services.corpusService.requests, "get") as mock_get_cs: - mock_get_cs.return_value = MockResponse( - json.dumps(AnnisResponse(graph_data=GraphData(links=[], nodes=[]), solutions=[]).to_dict())) - response: Response = Mocks.app_dict[self.class_name].client.get( - TestingConfig.SERVER_URI_RAW_TEXT, query_string=dict(urn=Mocks.urn_custom)) - self.assertEqual(response.status_code, 404) - mock_get_cs.return_value = MockResponse(json.dumps(Mocks.annis_response.to_dict())) - response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_RAW_TEXT, - query_string=dict(urn=Mocks.urn_custom)) - ar: AnnisResponse = AnnisResponse.from_dict(json.loads(response.get_data(as_text=True))) - self.assertEqual(len(ar.graph_data.nodes), 52) - ar_copy: AnnisResponse = AnnisResponse.from_dict(Mocks.annis_response.to_dict()) - ar_copy.graph_data.nodes = [] - mock_get_cs.return_value = MockResponse(json.dumps(ar_copy.to_dict())) - response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_RAW_TEXT, - query_string=dict(urn=Mocks.urn_custom)) - self.assertEqual(response.status_code, 404) + with patch.object(mcserver.app.services.corpusService.CorpusService, "get_corpus") as mock_get_corpus: + with patch.object(mcserver.app.services.textComplexityService.TextComplexityService, + "text_complexity", return_value=Mocks.text_complexity): + mock_get_corpus.return_value = AnnisResponse( + graph_data=GraphData(links=[], nodes=[]), solutions=[]) + response: Response = Mocks.app_dict[self.class_name].client.get( + TestingConfig.SERVER_URI_RAW_TEXT, query_string=dict(urn=Mocks.urn_custom)) + self.assertEqual(response.status_code, 404) + mock_get_corpus.return_value = Mocks.annis_response + response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_RAW_TEXT, + query_string=dict(urn=Mocks.urn_custom)) + ar: AnnisResponse = AnnisResponse.from_dict(json.loads(response.get_data(as_text=True))) + self.assertEqual(len(ar.graph_data.nodes), 52) + ar_copy: AnnisResponse = AnnisResponse.from_dict(Mocks.annis_response.to_dict()) + ar_copy.graph_data.nodes = [] + mock_get_corpus.return_value = ar_copy + response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_RAW_TEXT, + query_string=dict(urn=Mocks.urn_custom)) + self.assertEqual(response.status_code, 404) def test_api_static_exercises_get(self): """ Retrieves static exercises from the frontend and publishes deep URLs for each one of them. """ @@ -449,25 +443,18 @@ class McTestCase(unittest.TestCase): response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_STATIC_EXERCISES) self.assertEqual(mock_udpipe.call_count, 1) - @patch('mcserver.app.services.corpusService.requests.get', side_effect=mocked_requests_get) - def test_api_subgraph_get(self, mock_get: MagicMock): - """ Retrieves subgraph data for a given URN. """ - ar: AnnisResponse = CorpusService.get_subgraph(Mocks.urn_custom, 'tok="quarum"', 0, 0, False) - self.assertEqual(len(ar.solutions), 3) - - @patch('mcserver.app.services.textComplexityService.requests.post', side_effect=mocked_requests_post) - def test_api_text_complexity_get(self, mock_post: MagicMock): + def test_api_text_complexity_get(self): """ Calculates text complexity measures for a given URN. """ - with patch.object(mcserver.app.services.corpusService.requests, "get", - return_value=MockResponse(json.dumps(Mocks.graph_data.to_dict()))): - args: dict = dict(urn=Mocks.urn_custom, measure=TextComplexityMeasure.all.name) - response: Response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_TEXT_COMPLEXITY, - query_string=args) - self.assertEqual(response.get_data(as_text=True), Mocks.text_complexity_json_string) - args["measure"] = "n_w" - response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_TEXT_COMPLEXITY, - query_string=args) - self.assertEqual(json.loads(response.get_data(as_text=True))["n_w"], 52) + tc: TextComplexity = TextComplexityService.text_complexity("n_w", Mocks.urn_custom, Mocks.graph_data) + self.assertEqual(tc.n_w, 52) + with patch.object(mcserver.app.services.corpusService.CorpusService, "get_corpus", + return_value=Mocks.annis_response): + with patch.object(mcserver.app.services.textComplexityService.TextComplexityService, + "text_complexity", return_value=Mocks.text_complexity): + args: dict = dict(urn=Mocks.urn_custom, measure=TextComplexityMeasure.all.name) + response: Response = Mocks.app_dict[self.class_name].client.get( + TestingConfig.SERVER_URI_TEXT_COMPLEXITY, query_string=args) + self.assertEqual(response.get_data(as_text=True), json.dumps(Mocks.text_complexity.to_dict())) @patch('MyCapytain.retrievers.cts5.requests.get', side_effect=mocked_requests_get) def test_api_valid_reff_get(self, mock_get: MagicMock): # @@ -520,13 +507,12 @@ class McTestCase(unittest.TestCase): response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_VOCABULARY, query_string=args) sentences: List[Sentence] = [Sentence.from_dict(x) for x in json.loads(response.get_data(as_text=True))] - self.assertEqual(sentences[0].matching_degree, 90.9090909090909) + self.assertEqual(sentences[0].matching_degree, 92.85714285714286) - @patch('mcserver.app.services.textComplexityService.requests.post', side_effect=mocked_requests_post) - def test_api_vocabulary_post(self, mock_post: MagicMock): + def test_api_vocabulary_post(self): """ Indicates for each token of a corpus whether it is covered by a reference vocabulary. """ - with patch.object(mcserver.app.services.corpusService.requests, "get", - return_value=MockResponse(json.dumps(Mocks.annis_response.to_dict()))): + with patch.object(mcserver.app.services.corpusService.CorpusService, "get_corpus", + return_value=Mocks.annis_response): vf: VocabularyForm = VocabularyForm(frequency_upper_bound=500, query_urn=Mocks.urn_custom, vocabulary=VocabularyMC.AGLDT) response: Response = Mocks.app_dict[self.class_name].client.post( @@ -535,19 +521,16 @@ class McTestCase(unittest.TestCase): self.assertTrue(NodeMC.from_dict(ar.graph_data.nodes[3].to_dict()).is_oov) def test_app_init(self): - """Creates a CSM app in testing mode.""" + """Creates an MCserver app in testing mode.""" CorpusService.init_graphannis_logging() log_path: str = os.path.join(os.getcwd(), Config.GRAPHANNIS_LOG_PATH) self.assertTrue(os.path.exists(log_path)) os.remove(log_path) with patch.object(sys, 'argv', Mocks.test_args): - app: Flask = csm.get_app() + app: Flask = mcserver.get_app() self.assertIsInstance(app, Flask) self.assertTrue(app.config["TESTING"]) db.session.query(UpdateInfo).delete() - app = mcserver.get_app() - self.assertIsInstance(app, Flask) - self.assertTrue(app.config["TESTING"]) Mocks.app_dict[self.class_name].app_context.push() db.session.query(Corpus).delete() @@ -584,26 +567,7 @@ class McTestCase(unittest.TestCase): data_received: bytes = response.get_data() self.assertEqual(content, data_received) - def test_init_corpus_storage_manager(self): - """ Initializes the corpus storage manager. """ - ui_cts: UpdateInfo = UpdateInfo.from_dict(resource_type=ResourceType.cts_data.name, - last_modified_time=datetime.utcnow().timestamp(), created_time=1) - db.session.add(ui_cts) - DatabaseService.commit() - csm_process: Process - with patch.object(sys, 'argv', Mocks.test_args): - os.environ[Config.COVERAGE_ENVIRONMENT_VARIABLE] = Config.COVERAGE_CONFIGURATION_FILE_NAME - csm_process = Process(target=csm.run_app) - csm_process.start() - Mocks.app_dict[self.class_name].app_context.push() - self.assertTrue(csm_process.is_alive()) - csm_process.terminate() - csm_process.join() - self.assertFalse(csm_process.is_alive()) - db.session.query(UpdateInfo).delete() - - @patch('mcserver.app.services.textComplexityService.requests.post', side_effect=mocked_requests_post) - def test_map_exercise_data_to_database(self, mock_post: MagicMock): + def test_map_exercise_data_to_database(self): """Maps exercise data to the database and saves it for later access.""" ui_exercises: UpdateInfo = UpdateInfo.from_dict(resource_type=ResourceType.exercise_list.name, last_modified_time=1, created_time=1) @@ -612,8 +576,8 @@ class McTestCase(unittest.TestCase): exercise_expected: Exercise = Mocks.exercise exercise: Exercise = map_exercise_data_to_database( solutions=[Solution.from_dict(x) for x in json.loads(exercise_expected.solutions)], - exercise_data=Mocks.exercise_data, instructions=exercise_expected.instructions, - exercise_type=exercise_expected.exercise_type, + exercise_data=ExerciseData(json_dict=Mocks.exercise_data.serialize()), + instructions=exercise_expected.instructions, exercise_type=exercise_expected.exercise_type, exercise_type_translation=exercise_expected.exercise_type_translation, xml_guid=exercise_expected.eid, conll=exercise_expected.conll, correct_feedback=exercise_expected.correct_feedback, partially_correct_feedback=exercise_expected.partially_correct_feedback, urn=Mocks.urn_custom, @@ -655,99 +619,17 @@ class McTestCase(unittest.TestCase): db.session.query(UpdateInfo).delete() -class CsmTestCase(unittest.TestCase): - """The test suite for the Corpus Storage Manager application.""" - +class CorpusTestCase(unittest.TestCase): def setUp(self): """Initializes the testing environment.""" self.start_time = time.time() self.class_name: str = str(self.__class__) - TestHelper.update_flask_app(self.class_name, create_csm_app) + TestHelper.update_flask_app(self.class_name, create_app) def tearDown(self): """Finishes testing by removing the traces.""" print("{0}: {1} seconds".format(self.id(), "%.2f" % (time.time() - self.start_time))) - def test_api_annis_find(self): - """Retrieves search results from ANNIS for a given corpus and AQL query.""" - disk_urn: str = AnnotationService.get_disk_urn(Mocks.urn_custom) - AnnotationService.map_conll_to_graph(corpus_name=Mocks.urn_custom, conll=Mocks.annotations, - cs=Config.CORPUS_STORAGE_MANAGER, file_name=disk_urn) - response: Response = Mocks.app_dict[self.class_name].client.get( - Config.SERVER_URI_ANNIS_FIND, query_string=dict(urn=Mocks.urn_custom, aql="tok")) - matches: List[str] = json.loads(response.get_data()) - self.assertEqual(len(matches), 6) - solutions: List[Solution] = CorpusService.get_matches(Mocks.urn_custom, ['tok ->dep tok'], - [Phenomenon.DEPENDENCY]) - self.assertEqual(len(solutions), 5) - solutions = CorpusService.get_matches(Mocks.urn_custom, ['upostag="VERB" ->dep tok'], - [Phenomenon.UPOSTAG, Phenomenon.DEPENDENCY]) - self.assertEqual(len(solutions), 5) - solutions = CorpusService.get_matches(Mocks.urn_custom, ['tok ->dep tok ->dep tok'], - [Phenomenon.DEPENDENCY, Phenomenon.UPOSTAG]) - self.assertEqual(len(solutions), 3) - - def test_api_csm_get(self): - """Gets the raw text for a specific URN.""" - ret_vals: List[AnnisResponse] = [ - AnnisResponse(graph_data=GraphData(links=[], nodes=[])), Mocks.annis_response] - with patch.object(CorpusService, "get_corpus", side_effect=ret_vals): - response: Response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_CSM, - query_string=dict(urn=Mocks.urn[:5])) - self.assertEqual(response.status_code, 404) - response: Response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_CSM, - query_string=dict(urn=Mocks.urn_custom)) - ar: AnnisResponse = AnnisResponse.from_dict(json.loads(response.get_data(as_text=True))) - text_raw = " ".join(x.annis_tok for x in ar.graph_data.nodes) - # remove the spaces before punctuation because, otherwise, the parser won't work correctly - received_text: str = re.sub('[ ]([{0}])'.format(string.punctuation), r'\1', text_raw) - expected_text: str = "Pars est prima prudentiae ipsam cui praecepturus es aestimare personam." - self.assertIn(expected_text, received_text) - - def test_api_frequency_get(self): - """ Requests a frequency analysis for a given URN. """ - expected_fa: List[FrequencyItem] = [ - FrequencyItem(values=[Dependency.object.name], phenomena=[Phenomenon.DEPENDENCY], count=1), - FrequencyItem(values=[PartOfSpeech.adjective.name], phenomena=[Phenomenon.UPOSTAG], count=1)] - with patch.object(CorpusService, "get_frequency_analysis", return_value=expected_fa): - response: Response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_FREQUENCY, - query_string=dict(urn=Mocks.urn_custom)) - result_list: List[dict] = json.loads(response.get_data(as_text=True)) - fa: List[FrequencyItem] = [FrequencyItem.from_dict(x) for x in result_list] - self.assertEqual(fa[0].values, expected_fa[0].values) - self.assertEqual(fa[1].values[0], None) - - def test_api_subgraph_get(self): - """ Retrieves subgraph data for a given URN. """ - args: dict = dict(urn=Mocks.urn_custom, aqls=['tok="Galli"'], ctx_left="0", ctx_right="0") - response: Response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_CSM_SUBGRAPH, - query_string=args) - self.assertEqual(response.get_data(as_text=True), Mocks.subgraph_json) - - def test_api_subgraph_post(self): - """ Retrieves KWIC-style subgraph data for a given URN. """ - args: dict = dict(urn=Mocks.urn_custom, aqls=['tok="Galli"'], ctx_left="5", ctx_right="5") - response: Response = Mocks.app_dict[self.class_name].client.post(TestingConfig.SERVER_URI_CSM_SUBGRAPH, - data=json.dumps(args)) - results_list: list = json.loads(response.data.decode("utf-8")) - exercise_data_list: List[ExerciseData] = [ExerciseData(json_dict=x) for x in results_list] - self.assertEqual(len(exercise_data_list[0].graph.nodes), 6) - with self.assertRaises(NotImplementedError): - AnnotationService.get_single_subgraph("", []) - - def test_api_text_complexity_post(self): - """ Calculates text complexity measures for a given URN. """ - tcf: TextComplexityForm = TextComplexityForm(urn=Mocks.urn_custom, measure=TextComplexityMeasure.all.name) - response: Response = Mocks.app_dict[self.class_name].client.post(TestingConfig.SERVER_URI_TEXT_COMPLEXITY, - data=tcf.to_dict()) - tc: TextComplexity = TextComplexity.from_dict(json.loads(response.get_data(as_text=True))) - self.assertEqual(tc.pos, 5) - tcf.measure = "n_w" - response = Mocks.app_dict[self.class_name].client.post( - TestingConfig.SERVER_URI_TEXT_COMPLEXITY, data=tcf.to_dict()) - tc = TextComplexity.from_dict(json.loads(response.get_data(as_text=True))) - self.assertEqual(tc.n_w, 6) - @patch('mcserver.app.services.corpusService.CorpusService.update_corpora') def test_check_corpus_list_age(self, mock_update: MagicMock): """Checks whether the list of available corpora needs to be updated.""" @@ -763,115 +645,71 @@ class CsmTestCase(unittest.TestCase): self.assertGreater(ui_cts.last_modified_time, utc_now.timestamp()) db.session.query(UpdateInfo).delete() - def test_corpus_storage_manager(self): - """Performs an end-to-end test for the Corpus Store Manager.""" - Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_CSM, - query_string=dict(urn=Mocks.urn_custom)) - data_dict: dict = dict(title=Mocks.exercise.urn, annotations=Mocks.exercise.conll, aqls=Mocks.aqls, - exercise_type=ExerciseType.cloze.name, search_phenomena=[Phenomenon.UPOSTAG]) - first_response: Response = Mocks.app_dict[self.class_name].client.post(TestingConfig.SERVER_URI_CSM, - data=json.dumps(data_dict)) - # ANNIS does not create deterministically reproducible results, so we only test for a substring - self.assertIn(Mocks.graph_data_raw_part, first_response.get_data(as_text=True)) - third_response: Response = Mocks.app_dict[self.class_name].client.post(TestingConfig.SERVER_URI_CSM, - data=data_dict) - # Response: Bad Request - self.assertEqual(third_response.status_code, 400) - - def test_find_matches(self): - """ Finds matches for a given URN and AQL and returns the corresponding node IDs. """ - matches: List[str] = CorpusService.find_matches(Mocks.urn_custom[:-6] + "3.1.1", "tok", True) - self.assertEqual(len(matches), 56) - expected_matches: List[str] = ["a", "b"] - with patch.object(mcserver.app.services.corpusService.requests, "get", - return_value=MockResponse(json.dumps(expected_matches))): - matches: List[str] = CorpusService.find_matches(Mocks.urn, "") - self.assertEqual(matches, expected_matches) - - def test_full_init(self): - """ Fully initializes the application, including logging.""" - Mocks.app_dict[self.class_name].app.config["TESTING"] = False - with patch.object(CorpusService, "init_graphannis_logging"): - with patch.object(mcserver.app, "start_updater") as updater_mock: - full_init(Mocks.app_dict[self.class_name].app) - self.assertEqual(updater_mock.call_count, 1) - Mocks.app_dict[self.class_name].app.config["TESTING"] = True - db.session.query(UpdateInfo).delete() + def test_extract_custom_corpus_text(self): + """ Extracts text from the relevant parts of a (custom) corpus. """ + new_text_parts: List[ReferenceableText] = CustomCorpusService.extract_custom_corpus_text( + Mocks.text_parts, ["", ""], ["", "0"], "", 1, [False, True]) + self.assertEqual(len(new_text_parts), 0) + new_text_parts = CustomCorpusService.extract_custom_corpus_text(Mocks.text_parts, ["", ""], ["", "0"], "", 1) + self.assertEqual(new_text_parts[0].text, Mocks.text_parts[0].text_value) + new_text_parts = CustomCorpusService.extract_custom_corpus_text(Mocks.text_parts, ["1"], ["3"], "") + self.assertEqual(new_text_parts[0].text, Mocks.text_parts[0].text_value) - def test_get_annotations_from_string(self): - """ Gets annotation data from a given string, be it a CoNLL string or a corpus URN. """ - conll: List[TokenList] - with patch.object(AnnotationService, "get_udpipe", return_value=Mocks.udpipe_string): - with patch.object(CorpusService, "load_text_list", return_value=Mocks.text_list): - with patch.object(CorpusService, "get_raw_text", return_value=Mocks.raw_text): - conll = CorpusService.get_annotations_from_string(Mocks.urn) - self.assertEqual(len(conll[0]), 5) - mdg: MultiDiGraph = CorpusService.get_graph(Mocks.urn) - self.assertEqual(len(mdg.nodes), 5) - mdg = CorpusService.get_graph(f"{Mocks.urn}@1-1") - self.assertEqual(len(mdg.nodes), 5) - with patch.object(CustomCorpusService, "get_treebank_annotations", return_value=Mocks.annotations): - conll = CorpusService.get_annotations_from_string(Mocks.urn_custom) - self.assertEqual(len(conll[0]), 6) - with patch.object(CustomCorpusService, "get_custom_corpus_annotations", return_value=Mocks.annotations * 2): - urn: str = f"{Config.CUSTOM_CORPUS_VIVA_URN}:1.1-1.1" - conll = CorpusService.get_annotations_from_string(urn) - self.assertEqual(len(conll), 2) + def test_get_corpus(self): + """ Loads the text for a standard corpus from the CTS API or cache. """ + ar: AnnisResponse = CorpusService.get_corpus("") + self.assertEqual(len(ar.graph_data.nodes), 0) - def test_get_frequency_analysis(self): - """ Gets a frequency analysis by calling the CSM. """ - with patch.object(mcserver.app.services.corpusService.requests, "get", return_value=MockResponse( - json.dumps([FrequencyItem(values=[], phenomena=[], count=0).to_dict()]))): - fa: List[FrequencyItem] = CorpusService.get_frequency_analysis(urn=Mocks.urn_custom, is_csm=False) - self.assertEqual(len(fa), 1) - CorpusService.get_corpus(Mocks.urn_custom, True) - with patch.object(CorpusService, "get_corpus", return_value=Mocks.annis_response): - fa = CorpusService.get_frequency_analysis(Mocks.urn_custom, True) - self.assertEqual(len(fa), 163) + def test_get_custom_corpus_annotations(self): + """ Retrieves the annotated text for a custom non-PROIEL corpus, e.g. a textbook. """ + mock_conll: List[TokenList] = Mocks.annotations + [TokenList([], metadata=OrderedDict([("sent_id", "3")]))] + with patch.object(CustomCorpusService, "get_custom_corpus_text", return_value=Mocks.text_list): + with patch.object(AnnotationService, "get_udpipe", return_value=Mocks.udpipe_string): + with patch.object(AnnotationService, "parse_conll_string", return_value=mock_conll): + conll: List[TokenList] = CustomCorpusService.get_custom_corpus_annotations(Mocks.urn + "@1-2") + self.assertEqual(len(conll), 1) - def test_get_graph(self): - """ Retrieves a graph from the cache or, if not there, builds it from scratch. """ - expected_mdg: MultiDiGraph = MultiDiGraph([(1, 2), (2, 3), (3, 4)]) - with patch.object(Config.CORPUS_STORAGE_MANAGER, "subcorpus_graph", return_value=expected_mdg): - mdg: MultiDiGraph = CorpusService.get_graph(Mocks.urn) - self.assertEqual(mdg, expected_mdg) + def test_get_custom_corpus_reff(self): + """ Retrieves possible citations for given URN. """ + CustomCorpusService.custom_corpora[4].text_parts = Mocks.text_parts + reff: List[str] = CustomCorpusService.get_custom_corpus_reff(Mocks.urn_custom[:-15]) + self.assertEqual(len(reff), 0) + McTestCase.clear_folder(Config.REFF_CACHE_DIRECTORY) + reff = CustomCorpusService.get_custom_corpus_reff(Mocks.urn_custom[:-14]) + self.assertEqual(len(reff), 1) + McTestCase.clear_folder(Config.REFF_CACHE_DIRECTORY) + reff = CustomCorpusService.get_custom_corpus_reff(Mocks.urn_custom[:-9]) + self.assertEqual(len(reff), 2) + reff = CustomCorpusService.get_custom_corpus_reff(Mocks.urn_custom[:-9]) + self.assertEqual(len(reff), 2) + McTestCase.clear_folder(Config.REFF_CACHE_DIRECTORY) + CustomCorpusService.custom_corpora[4].text_parts = [] + with patch.object(CustomCorpusService, "init_custom_corpus", + return_value=CustomCorpusService.custom_corpora[4]): + source_urn: str = CustomCorpusService.custom_corpora[4].corpus.source_urn + reff = CustomCorpusService.get_custom_corpus_reff(source_urn) + self.assertEqual(len(reff), 0) + McTestCase.clear_folder(Config.REFF_CACHE_DIRECTORY) - def test_init_updater(self): - """Initializes the corpus list updater.""" - with patch.object(CorpusService, 'check_corpus_list_age', side_effect=OperationalError("", [], "")): - ui_cts: UpdateInfo = UpdateInfo.from_dict(resource_type=ResourceType.cts_data.name, - last_modified_time=1, created_time=1) - db.session.add(ui_cts) - DatabaseService.commit() - with patch.object(CorpusService, 'update_corpora') as update_mock: - t: Thread = start_updater(Mocks.app_dict[self.class_name].app) - self.assertIsInstance(t, Thread) - self.assertTrue(t.is_alive()) - time.sleep(0.1) - db.session.query(UpdateInfo).delete() - assert not update_mock.called + def test_get_custom_corpus_text(self): + """ Retrieves the text for a custom corpus, e.g. a textbook. """ + text_list: List[ReferenceableText] = CustomCorpusService.get_custom_corpus_text(Mocks.urn) + self.assertEqual(len(text_list), 0) - def test_map_conll_to_graph(self): - """ Saves an annotated corpus in CONLL format to the ANNIS corpus storage. """ - conll: List[TokenList] = Mocks.annotations + copy.deepcopy(Mocks.annotations) - conll[1].metadata = dict(sent_id="2") - disk_urn: str = AnnotationService.get_disk_urn(Mocks.urn_custom) - AnnotationService.map_conll_to_graph(corpus_name=Mocks.urn_custom, conll=conll, - cs=Config.CORPUS_STORAGE_MANAGER, file_name=disk_urn) - result: dict = CorpusService.process_corpus_data( - urn=Mocks.urn_custom, annotations=conll, aqls=["tok"], exercise_type=ExerciseType.cloze, - search_phenomena=[Phenomenon.UPOSTAG]) - gd: GraphData = AnnotationService.map_graph_data(result["graph_data_raw"]) - self.assertEqual(gd.nodes[-1].id.split("/")[0], gd.nodes[0].id.split("/")[0]) + def test_init_custom_corpus(self): + """Adds custom corpora to the corpus list, e.g. the PROIEL corpora.""" + with patch.object(CustomCorpusService, "get_treebank_annotations", return_value=Mocks.annotations): + cc: CustomCorpus = CustomCorpusService.init_custom_corpus(CustomCorpusService.custom_corpora[0]) + self.assertEqual(len(cc.text_parts), 1) def test_process_corpus_data(self): """Builds a graph from annotated text data.""" disk_urn: str = AnnotationService.get_disk_urn(Mocks.urn_custom) AnnotationService.map_conll_to_graph(corpus_name=Mocks.urn_custom, conll=Mocks.annotations, - cs=Config.CORPUS_STORAGE_MANAGER, file_name=disk_urn) - result: dict = CorpusService.process_corpus_data(urn=Mocks.urn_custom, annotations=Mocks.annotations, - aqls=["upostag"], exercise_type=ExerciseType.cloze, - search_phenomena=[Phenomenon.UPOSTAG]) + file_name=disk_urn) + result: dict = CorpusService.process_corpus_data( + urn=Mocks.urn_custom, annotations=Mocks.annotations, aqls=[Phenomenon.UPOSTAG], + exercise_type=ExerciseType.cloze, search_phenomena=[Phenomenon.UPOSTAG]) gd: GraphData = AnnotationService.map_graph_data(result["graph_data_raw"]) self.assertEqual(len(gd.nodes), len(Mocks.nodes)) urn_parts: List[str] = Mocks.urn_custom.split(":") @@ -882,20 +720,13 @@ class CsmTestCase(unittest.TestCase): text_parts_list: List[ReferenceableText] = CorpusService.load_text_list(Mocks.urn_custom) self.assertEqual(len(text_parts_list), 1) - def test_run_app(self): - """ Creates a new app and runs it. """ - with patch.object(csm, "get_app") as mock_get_app: - csm.run_app() - self.assertEqual(mock_get_app.call_count, 1) - Mocks.app_dict[self.class_name].app_context.push() - class CommonTestCase(unittest.TestCase): def setUp(self): """Initializes the testing environment.""" self.start_time = time.time() self.class_name: str = str(self.__class__) - TestHelper.update_flask_app(self.class_name, create_csm_app) + TestHelper.update_flask_app(self.class_name, create_app) def tearDown(self): """Finishes testing by removing the traces.""" @@ -903,11 +734,11 @@ class CommonTestCase(unittest.TestCase): def test_add_dependency_frequencies(self): """ Performs a frequency analysis for dependency annotations in a corpus. """ - gd: GraphData = GraphData.from_dict(Mocks.graph_data.to_dict()) - gd.links[0].udep_deprel = "safebpfw" - gd.links[48].udep_deprel = "fkonürwür" + gd_copy: GraphData = GraphData.from_dict(Mocks.graph_data.to_dict()) + gd_copy.links[0].udep_deprel = "safebpfw" + gd_copy.links[48].udep_deprel = "fkonürwür" fis: List[FrequencyItem] = [] - FrequencyService.add_dependency_frequencies(gd, fis) + FrequencyService.add_dependency_frequencies(gd_copy, fis) self.assertEqual(len(fis), 134) def test_add_edges(self): @@ -936,6 +767,15 @@ class CommonTestCase(unittest.TestCase): self.assertEqual(conll[1].metadata["urn"], "") self.assertEqual(conll[2].metadata["urn"], Mocks.urn_custom) + def test_check_vocabulary(self): + """ Checks whether the lemmata of a given graph/text match a reference vocabulary. """ + gd_copy: GraphData = GraphData.from_dict(Mocks.graph_data.to_dict()) + new_node: NodeMC = NodeMC.from_dict(gd_copy.nodes[0].to_dict()) + new_node.id = "salt:/urn:custom:latinLit:proiel.pal-agr.lat:1.1.1/doc1#sent159693tok1" + gd_copy.nodes.append(NodeMC) + sentences: List[Sentence] = check_vocabulary(Mocks.graph_data, {""}) + self.assertEqual(sentences[1].matching_degree, 3.225806451612903) + def test_create_xml_string(self): """Exports the exercise data to the Moodle XML format. See https://docs.moodle.org/35/en/Moodle_XML_format .""" xml_string: str = XMLservice.create_xml_string( @@ -955,15 +795,47 @@ class CommonTestCase(unittest.TestCase): are_dependencies_missing = True self.assertFalse(are_dependencies_missing) - def test_extract_custom_corpus_text(self): - """ Extracts text from the relevant parts of a (custom) corpus. """ - new_text_parts: List[ReferenceableText] = CustomCorpusService.extract_custom_corpus_text( - Mocks.text_parts, ["", ""], ["", "0"], "", 1, [False, True]) - self.assertEqual(len(new_text_parts), 0) - new_text_parts = CustomCorpusService.extract_custom_corpus_text(Mocks.text_parts, ["", ""], ["", "0"], "", 1) - self.assertEqual(new_text_parts[0].text, Mocks.text_parts[0].text_value) - new_text_parts = CustomCorpusService.extract_custom_corpus_text(Mocks.text_parts, ["1"], ["3"], "") - self.assertEqual(new_text_parts[0].text, Mocks.text_parts[0].text_value) + def test_find_matches(self): + """ Finds matches for a given URN and AQL and returns the corresponding node IDs. """ + expected_matches: List[str] = ["a", "b"] + with patch.object(Config.CORPUS_STORAGE_MANAGER, "find", + side_effect=[[expected_matches], NoSuchCorpus(""), [expected_matches]]): + matches: List[str] = CorpusService.find_matches(Mocks.urn_custom[:-6] + "3.1.1", "tok") + self.assertEqual(matches, expected_matches) + with patch.object(mcserver.app.services.corpusService.CorpusService, "get_corpus"): + matches = CorpusService.find_matches(Mocks.urn, "") + self.assertEqual(matches, expected_matches) + + def test_full_init(self): + """ Fully initializes the application, including logging.""" + Mocks.app_dict[self.class_name].app.config["TESTING"] = False + with patch.object(CorpusService, "init_graphannis_logging"): + with patch.object(mcserver.app, "start_updater") as updater_mock: + full_init(Mocks.app_dict[self.class_name].app) + self.assertEqual(updater_mock.call_count, 1) + Mocks.app_dict[self.class_name].app.config["TESTING"] = True + db.session.query(UpdateInfo).delete() + + def test_get_annotations_from_string(self): + """ Gets annotation data from a given string, be it a CoNLL string or a corpus URN. """ + conll: List[TokenList] + with patch.object(AnnotationService, "get_udpipe", return_value=Mocks.udpipe_string): + with patch.object(CorpusService, "load_text_list", return_value=Mocks.text_list): + with patch.object(CorpusService, "get_raw_text", return_value=Mocks.raw_text): + conll = CorpusService.get_annotations_from_string(Mocks.urn) + self.assertEqual(len(conll[0]), 5) + mdg: MultiDiGraph = CorpusService.get_graph(Mocks.urn) + self.assertEqual(len(mdg.nodes), 5) + mdg = CorpusService.get_graph(f"{Mocks.urn}@1-1") + self.assertEqual(len(mdg.nodes), 5) + with patch.object(CustomCorpusService, "get_treebank_annotations", return_value=Mocks.annotations): + conll = CorpusService.get_annotations_from_string(Mocks.urn_custom) + self.assertEqual(len(conll[0]), 6) + with patch.object(CustomCorpusService, "get_custom_corpus_annotations", + return_value=Mocks.annotations * 2): + urn: str = f"{Config.CUSTOM_CORPUS_VIVA_URN}:1.1-1.1" + conll = CorpusService.get_annotations_from_string(urn) + self.assertEqual(len(conll), 2) def test_get_concept_network(self): """Extracts a network of words from vector data in an AI model.""" @@ -973,39 +845,48 @@ class CommonTestCase(unittest.TestCase): svg_string: str = get_concept_network("ueritas", highlight_regex_string="uera") self.assertGreater(len(svg_string), 6500) - def test_get_corpus(self): - """ Loads the text for a standard corpus from the CTS API or cache. """ - ar: AnnisResponse = CorpusService.get_corpus("", True) - self.assertEqual(len(ar.graph_data.nodes), 0) + def test_get_frequency_analysis(self): + """ Collects frequency statistics for various combinations of linguistic annotations in a corpus. """ + with patch.object( + mcserver.app.services.corpusService.CorpusService, "get_frequency_analysis", + return_value=[FrequencyItem(values=[], phenomena=[], count=0)]): + fa: List[FrequencyItem] = CorpusService.get_frequency_analysis(urn=Mocks.urn_custom) + self.assertEqual(len(fa), 1) + CorpusService.get_corpus(Mocks.urn_custom) + with patch.object(CorpusService, "get_corpus", return_value=Mocks.annis_response): + fa = CorpusService.get_frequency_analysis(Mocks.urn_custom) + self.assertEqual(len(fa), 163) - def test_get_custom_corpus_annotations(self): - """ Retrieves the annotated text for a custom non-PROIEL corpus, e.g. a textbook. """ - mock_conll: List[TokenList] = Mocks.annotations + [TokenList([], metadata=OrderedDict([("sent_id", "3")]))] - with patch.object(CustomCorpusService, "get_custom_corpus_text", return_value=Mocks.text_list): - with patch.object(AnnotationService, "get_udpipe", return_value=Mocks.udpipe_string): - with patch.object(AnnotationService, "parse_conll_string", return_value=mock_conll): - conll: List[TokenList] = CustomCorpusService.get_custom_corpus_annotations(Mocks.urn + "@1-2") - self.assertEqual(len(conll), 1) + def test_get_graph(self): + """ Retrieves a graph from the cache or, if not there, builds it from scratch. """ + expected_mdg: MultiDiGraph = MultiDiGraph([(1, 2), (2, 3), (3, 4)]) + with patch.object(Config.CORPUS_STORAGE_MANAGER, "subcorpus_graph", return_value=expected_mdg): + mdg: MultiDiGraph = CorpusService.get_graph(Mocks.urn) + self.assertEqual(mdg, expected_mdg) - def test_get_custom_corpus_reff(self): - """ Retrieves possible citations for given URN. """ - CustomCorpusService.custom_corpora[4].text_parts = Mocks.text_parts - reff: List[str] = CustomCorpusService.get_custom_corpus_reff(Mocks.urn_custom[:-15]) - self.assertEqual(len(reff), 0) - McTestCase.clear_folder(Config.REFF_CACHE_DIRECTORY) - reff = CustomCorpusService.get_custom_corpus_reff(Mocks.urn_custom[:-14]) - self.assertEqual(len(reff), 1) - McTestCase.clear_folder(Config.REFF_CACHE_DIRECTORY) - reff = CustomCorpusService.get_custom_corpus_reff(Mocks.urn_custom[:-9]) - self.assertEqual(len(reff), 2) - reff = CustomCorpusService.get_custom_corpus_reff(Mocks.urn_custom[:-9]) - self.assertEqual(len(reff), 2) - McTestCase.clear_folder(Config.REFF_CACHE_DIRECTORY) + def test_get_graph_data(self): + """Sends annotated text data or a URN to the Corpus Storage Manager in order to get a graph.""" + with patch.object(mcserver.app.services.corpusService.CorpusService, "get_annotations_from_string", + return_value=Mocks.annotations): + with patch.object(mcserver.app.services.corpusService.CorpusService, "process_corpus_data", + return_value=Mocks.annis_response_dict): + result: dict = get_graph_data("", "", [], ExerciseType.matching, []) + self.assertEqual(result, Mocks.annis_response_dict) - def test_get_custom_corpus_text(self): - """ Retrieves the text for a custom corpus, e.g. a textbook. """ - text_list: List[ReferenceableText] = CustomCorpusService.get_custom_corpus_text(Mocks.urn) - self.assertEqual(len(text_list), 0) + def test_get_matches(self): + """Retrieves search results from ANNIS for a given corpus and AQL query.""" + disk_urn: str = AnnotationService.get_disk_urn(Mocks.urn_custom) + AnnotationService.map_conll_to_graph(corpus_name=Mocks.urn_custom, conll=Mocks.annotations, + file_name=disk_urn) + solutions: List[Solution] = CorpusService.get_matches(Mocks.urn_custom, ['tok ->dep tok'], + [Phenomenon.DEPENDENCY]) + self.assertEqual(len(solutions), 5) + solutions = CorpusService.get_matches(Mocks.urn_custom, ['upostag="VERB" ->dep tok'], + [Phenomenon.UPOSTAG, Phenomenon.DEPENDENCY]) + self.assertEqual(len(solutions), 5) + solutions = CorpusService.get_matches(Mocks.urn_custom, ['tok ->dep tok ->dep tok'], + [Phenomenon.DEPENDENCY, Phenomenon.UPOSTAG]) + self.assertEqual(len(solutions), 3) def test_get_pdf_html_string(self): """ Builds an HTML string from an exercise, e.g. to construct a PDF from it. """ @@ -1023,7 +904,7 @@ class CommonTestCase(unittest.TestCase): def test_get_raw_text(self): """ Retrieves the raw text for a corpus. """ with patch.object(CorpusService, "get_corpus", return_value=Mocks.annis_response): - text: str = CorpusService.get_raw_text(Mocks.urn, True) + text: str = CorpusService.get_raw_text(Mocks.urn) self.assertEqual(len(text), 349) def test_get_solutions_by_index(self): @@ -1031,6 +912,13 @@ class CommonTestCase(unittest.TestCase): solutions: List[Solution] = TextService.get_solutions_by_index(Mocks.exercise, []) self.assertEqual(len(solutions), 1) + def test_get_subgraph(self): + """ Retrieves subgraph data for a given URN. """ + with patch.object(mcserver.app.services.annotationService.AnnotationService, "get_single_subgraph", + return_value=Mocks.graph_data): + ar: AnnisResponse = CorpusService.get_subgraph(Mocks.urn_custom, 'tok="quarum"', 0, 0) + self.assertEqual(len(ar.graph_data.nodes), len(Mocks.graph_data.nodes)) + def test_get_treebank_annotations(self): """ Retrieves annotations from a treebank. """ cache_path: str = os.path.join(Config.TREEBANKS_CACHE_DIRECTORY, @@ -1069,11 +957,10 @@ class CommonTestCase(unittest.TestCase): conll_string: str = AnnotationService.get_udpipe(text) self.assertIn(Mocks.udpipe_string, conll_string) - def test_init_custom_corpus(self): - """Adds custom corpora to the corpus list, e.g. the PROIEL corpora.""" - with patch.object(CustomCorpusService, "get_treebank_annotations", return_value=Mocks.annotations): - cc: CustomCorpus = CustomCorpusService.init_custom_corpus(CustomCorpusService.custom_corpora[0]) - self.assertEqual(len(cc.text_parts), 1) + def test_handle_exercise_data(self): + """ Constructs an SVG image (for POS and syntactic dependencies) from given annotations. """ + result: str = handle_exercise_data(ExerciseData(json_dict=Mocks.exercise_data.serialize()), 5, 5) + self.assertTrue(result.startswith('<svg height="160" id="svg1" width="224">')) def test_init_db_alembic(self): """In Docker, the alembic version is not initially written to the database, so we need to set it manually.""" @@ -1108,16 +995,33 @@ class CommonTestCase(unittest.TestCase): clear_cache() stop_word_list: Dict[str, List[str]] = {"a": ["b"]} mr: MockResponse = MockResponse(json.dumps(stop_word_list)) - with patch.object(mcserver.app.services.textService.requests, "get", return_value=mr) as mock_get_request: + with patch.object(mcserver.app.services.textService.requests, "get", return_value=mr) as \ + mock_get_request: TextService.init_stop_words_latin() self.assertEqual(len(TextService.stop_words_latin), 1) TextService.init_stop_words_latin() clear_cache() self.assertEqual(mock_get_request.call_count, 1) + def test_init_updater(self): + """Initializes the corpus list updater.""" + with patch.object(CorpusService, 'check_corpus_list_age', side_effect=OperationalError("", [], "")): + ui_cts: UpdateInfo = UpdateInfo.from_dict(resource_type=ResourceType.cts_data.name, + last_modified_time=1, created_time=1) + db.session.add(ui_cts) + DatabaseService.commit() + with patch.object(CorpusService, 'update_corpora') as update_mock: + t: Thread = start_updater(Mocks.app_dict[self.class_name].app) + self.assertIsInstance(t, Thread) + self.assertTrue(t.is_alive()) + time.sleep(0.1) + db.session.query(UpdateInfo).delete() + assert not update_mock.called + def test_is_match(self): """ Checks whether a given lemma is part of a reference vocabulary.""" self.assertTrue(TextService.is_match("neque", {"ne"})) + self.assertTrue(TextService.is_match("facile", {"facilis"})) def test_load_text_list(self): """ Loads the text list for a new corpus. """ @@ -1135,6 +1039,13 @@ class CommonTestCase(unittest.TestCase): text_parts = CorpusService.load_text_list(Mocks.urn) self.assertEqual(text_parts, []) + def test_log_exception(self): + """Logs errors that occur while the Flask app is working. """ + with patch.object(Mocks.app_dict[self.class_name].app.logger, "info") as mock_info: + with Mocks.app_dict[self.class_name].app.test_request_context("/?param=value"): + log_exception(Mocks.app_dict[self.class_name].app, ValueError()) + self.assertEqual(mock_info.call_count, 1) + def test_make_docx_file(self): """ Saves an exercise to a DOCX file (e.g. for later download). """ file_path: str = os.path.join(Config.TMP_DIRECTORY, "make_docx_file.docx") @@ -1169,9 +1080,21 @@ class CommonTestCase(unittest.TestCase): self.assertTrue(os.path.exists(df.file_path)) os.remove(df.file_path) + def test_map_conll_to_graph(self): + """ Saves an annotated corpus in CONLL format to the ANNIS corpus storage. """ + conll: List[TokenList] = Mocks.annotations + copy.deepcopy(Mocks.annotations) + conll[1].metadata = dict(sent_id="2") + disk_urn: str = AnnotationService.get_disk_urn(Mocks.urn_custom) + AnnotationService.map_conll_to_graph(corpus_name=Mocks.urn_custom, conll=conll, file_name=disk_urn) + result: dict = CorpusService.process_corpus_data( + urn=Mocks.urn_custom, annotations=conll, aqls=["tok"], exercise_type=ExerciseType.cloze, + search_phenomena=[Phenomenon.UPOSTAG]) + gd: GraphData = AnnotationService.map_graph_data(result["graph_data_raw"]) + self.assertEqual(gd.nodes[-1].id.split("/")[0], gd.nodes[0].id.split("/")[0]) + def test_map_graph_data(self): """Maps graph data to exercise data.""" - ed_expected: ExerciseData = Mocks.exercise_data + ed_expected: ExerciseData = ExerciseData(json_dict=Mocks.exercise_data.serialize()) node_expected: NodeMC = ed_expected.graph.nodes[0] node = {"id": node_expected.id, "annis::node_name": node_expected.annis_node_name, "annis::node_type": node_expected.annis_node_type, "annis::tok": node_expected.annis_tok, @@ -1208,6 +1131,8 @@ class CommonTestCase(unittest.TestCase): self.assertEqual(Choice(choice_dict).serialize(), choice_dict) xapi: XapiStatement = XapiStatement(json.loads(Mocks.xapi_json_string)["0"]) self.assertEqual(len(xapi.serialize().keys()), 5) + ed: ExerciseData = ExerciseData(json_dict=Mocks.exercise_data.serialize()) + self.assertEqual(len(ed.graph.nodes), len(Mocks.exercise_data.graph.nodes)) db.session.query(UpdateInfo).delete() session.make_transient(Mocks.corpora[0]) session.make_transient(Mocks.exercise) @@ -1255,10 +1180,10 @@ class CommonTestCase(unittest.TestCase): db.session.add_all(exercises) DatabaseService.commit() - with patch.object(mcserver.app.services.textComplexityService.requests, "post", - return_value=MockResponse(Mocks.text_complexity_json_string)): + with patch.object(mcserver.app.services.textComplexityService.TextComplexityService, "text_complexity", + return_value=Mocks.text_complexity): with patch.object(CorpusService, "get_corpus", return_value=Mocks.annis_response): - ExerciseService.update_exercises(False) + ExerciseService.update_exercises() exercises = DatabaseService.query(Exercise) self.assertEqual(len(exercises), 1) self.assertEqual(exercises[0].text_complexity, 54.53) @@ -1269,7 +1194,7 @@ if __name__ == '__main__': runner: unittest.TextTestRunner = unittest.TextTestRunner() suite: unittest.TestSuite = unittest.TestSuite() suite.addTests(TestLoader().loadTestsFromTestCase(McTestCase)) - suite.addTests(TestLoader().loadTestsFromTestCase(CsmTestCase)) + suite.addTests(TestLoader().loadTestsFromTestCase(CorpusTestCase)) suite.addTests(TestLoader().loadTestsFromTestCase(CommonTestCase)) runner.run(suite) if os.path.exists(Config.GRAPH_DATABASE_DIR):