Newer
Older
"""Unit tests for testing the application functionality."""
import copy
import ntpath
import os
import pickle
import shutil
import sys
import time
import unittest
import uuid
from collections import OrderedDict
from datetime import datetime
from threading import Thread
from typing import Dict, List, Tuple, Type, Any
from unittest import TestLoader
from unittest.mock import patch, MagicMock, mock_open
from zipfile import ZipFile
import MyCapytain
import rapidjson as json
from conllu import TokenList
from flask import Flask
from gensim.models import Word2Vec

Konstantin Schulz
committed
from graphannis.errors import NoSuchCorpus
from lxml import etree
from networkx import MultiDiGraph, Graph
from requests import HTTPError

Konstantin Schulz
committed
from sqlalchemy.exc import OperationalError, InvalidRequestError
from sqlalchemy.orm import session
from werkzeug.wrappers import Response
import mcserver
from mcserver.app import create_app, db, start_updater, full_init, log_exception, get_api_specification, \
init_app_common, create_postgres_database

Konstantin Schulz
committed
from mcserver.app.api.exerciseAPI import map_exercise_data_to_database, get_graph_data
from mcserver.app.api.fileAPI import clean_tmp_folder
from mcserver.app.api.h5pAPI import get_remote_exercise

Konstantin Schulz
committed
from mcserver.app.api.kwicAPI import handle_exercise_data
from mcserver.app.api.vocabularyAPI import check_vocabulary
from mcserver.app.api.zenodoAPI import remove_older_versions
from mcserver.app.models import ResourceType, FileType, ExerciseType, ExerciseData, \
NodeMC, LinkMC, GraphData, Phenomenon, CustomCorpus, AnnisResponse, Solution, DownloadableFile, Language, \

Konstantin Schulz
committed
VocabularyCorpus, TextComplexityMeasure, CitationLevel, FrequencyItem, Choice, XapiStatement, ExerciseMC, \
CorpusMC, make_solution_element_from_salt_id, Sentence, ReferenceableText, Dependency, PartOfSpeech, Citation

Konstantin Schulz
committed
from mcserver.app.services import AnnotationService, CorpusService, FileService, CustomCorpusService, \
DatabaseService, XMLservice, TextService, FrequencyService, ExerciseService, TextComplexityService, NetworkService
from mcserver.config import TestingConfig, Config, DevelopmentConfig
from mcserver.models_auto import Corpus, Exercise, UpdateInfo, LearningResult
from mocks import Mocks, MockResponse, MockW2V, MockQuery, TestHelper
from openapi.openapi_server.models import VocabularyMC, MatchingExercise, ExerciseTypePath, TextComplexity, \
ZenodoRecord, RawTextFormExtension
from openapi.openapi_server.models.exercise_form import ExerciseForm
from openapi.openapi_server.models.h5_p_form import H5PForm
from openapi.openapi_server.models.kwic_form import KwicForm
from openapi.openapi_server.models.vector_network_form import VectorNetworkForm
from openapi.openapi_server.models.vocabulary_form import VocabularyForm
from openapi.openapi_server.models.zenodo_form import ZenodoForm
class APItestCase(unittest.TestCase):
"""The test suite for the REST API."""
def setUp(self):
"""Initializes the testing environment."""
self.start_time = time.time()
if os.path.exists(Config.GRAPH_DATABASE_DIR):
shutil.rmtree(Config.GRAPH_DATABASE_DIR)
self.class_name: str = str(self.__class__)
TestHelper.update_flask_app(self.class_name, create_app)
def tearDown(self):
"""Finishes testing by removing the traces."""
print("{0}: {1} seconds".format(self.id(), "%.2f" % (time.time() - self.start_time)))
@staticmethod
def clear_folder(folder_path: str):
""" Deletes every file in a folder. """
for f in [x for x in os.listdir(folder_path) if x != ".gitignore"]:
os.remove(os.path.join(folder_path, f))
def test_api_bad_request(self):
"""Returns validation errors as JSON."""
response: Response = Mocks.app_dict[self.class_name].client.get(Config.SERVER_URI_CORPORA)
self.assertEqual(response.status_code, 400)
def test_api_corpus_delete(self):
""" Deletes a single corpus. """
db.session.query(Corpus).delete()
response: Response = Mocks.app_dict[self.class_name].client.delete(
f"{Config.SERVER_URI_CORPORA}/1")
self.assertEqual(response.status_code, 404)
TestHelper.add_corpus(Mocks.corpora[0])
response = Mocks.app_dict[self.class_name].client.delete(f"{Config.SERVER_URI_CORPORA}/{Mocks.corpora[0].cid}")
data_json: dict = json.loads(response.get_data())
self.assertEqual(data_json, True)
db.session.query(Corpus).delete()
db.session.query(UpdateInfo).delete()
# dirty hack so we can reuse it in other tests
session.make_transient(Mocks.corpora[0])
def test_api_corpus_get(self):
""" Gets information about a single corpus. """
response: Response = Mocks.app_dict[self.class_name].client.get(
f"{Config.SERVER_URI_CORPORA}/{Mocks.corpora[0].cid}")
self.assertEqual(response.status_code, 404)
TestHelper.add_corpus(Mocks.corpora[0])
response: Response = Mocks.app_dict[self.class_name].client.get(
f"{Config.SERVER_URI_CORPORA}/{Mocks.corpora[0].cid}")
data_json: dict = json.loads(response.get_data())
old_dict: dict = Mocks.corpora[0].to_dict()
self.assertEqual(data_json, old_dict)
db.session.query(Corpus).delete()
db.session.query(UpdateInfo).delete()
# dirty hack so we can reuse it in other tests
session.make_transient(Mocks.corpora[0])
def test_api_corpus_list_get(self):
"""Adds multiple texts to the database and queries them all."""
def expect_result(self: APItestCase, mock: MagicMock, lut: str, result: Any,
lmt: datetime = datetime.utcnow()):
ui: UpdateInfo = UpdateInfo.from_dict(resource_type=ResourceType.cts_data.name,
last_modified_time=lmt.timestamp(), created_time=1.0)
mock.session.query.return_value = MockQuery(ui)

Konstantin Schulz
committed
response: Response = Mocks.app_dict[self.class_name].client.get(
TestingConfig.SERVER_URI_CORPORA, query_string=dict(last_update_time=lut))
data_json = json.loads(response.get_data())
if data_json:
result = [x.to_dict() for x in result]
self.assertEqual(data_json, result)

Konstantin Schulz
committed
with patch.object(mcserver.app.services.databaseService, "db") as mock_db:
expect_result(self, mock_db, str(int(datetime.utcnow().timestamp() * 1000)), None,
datetime.fromtimestamp(0))
db.session.add_all(Mocks.corpora)
DatabaseService.commit()
expect_result(self, mock_db, "0", Mocks.corpora, datetime.fromtimestamp(time.time()))
db.session.query(Corpus).delete()
db.session.query(UpdateInfo).delete()
# dirty hack so we can reuse it in other tests
session.make_transient(Mocks.corpora[0])
def test_api_corpus_patch(self):
""" Changes information about a single corpus. """
response: Response = Mocks.app_dict[self.class_name].client.patch(
f"{Config.SERVER_URI_CORPORA}/{Mocks.corpora[0].cid}")
self.assertEqual(response.status_code, 404)
TestHelper.add_corpus(Mocks.corpora[0])
old_title: str = Mocks.corpora[0].title
new_title: str = "new_title"
response: Response = Mocks.app_dict[self.class_name].client.patch(
f"{Config.SERVER_URI_CORPORA}/{Mocks.corpora[0].cid}", data=dict(title=new_title))
data_json: dict = json.loads(response.get_data())
old_dict: dict = Mocks.corpora[0].to_dict()
self.assertEqual(data_json["title"], old_dict["title"])
Mocks.corpora[0].title = old_title
db.session.query(Corpus).delete()
db.session.query(UpdateInfo).delete()
# dirty hack so we can reuse it in other tests
session.make_transient(Mocks.corpora[0])
def test_api_exercise_get(self):
""" Retrieves an existing exercise by its exercise ID. """
ar_copy: AnnisResponse = Mocks.copy(Mocks.annis_response, AnnisResponse)
db.session.query(Exercise).delete()

Konstantin Schulz
committed
response: Response = Mocks.app_dict[self.class_name].client.get(
Config.SERVER_URI_EXERCISE, query_string=dict(eid=""))
self.assertEqual(response.status_code, 404)
old_urn: str = Mocks.exercise.urn
Mocks.exercise.urn = ""
db.session.add(Mocks.exercise)
DatabaseService.commit()
mock_ar: AnnisResponse = AnnisResponse(solutions=[], graph_data=GraphData(links=[], nodes=[]))
with patch.object(CorpusService, "get_corpus", side_effect=[mock_ar, ar_copy]):
response = Mocks.app_dict[self.class_name].client.get(Config.SERVER_URI_EXERCISE,
query_string=dict(eid=Mocks.exercise.eid))
self.assertEqual(response.status_code, 404)
Mocks.exercise.urn = old_urn
DatabaseService.commit()
response = Mocks.app_dict[self.class_name].client.get(Config.SERVER_URI_EXERCISE,
query_string=dict(eid=Mocks.exercise.eid))

Konstantin Schulz
committed
ar: AnnisResponse = AnnisResponse.from_dict(json.loads(response.get_data(as_text=True)))
self.assertEqual(len(ar.graph_data.nodes), 52)
db.session.query(Exercise).delete()
session.make_transient(Mocks.exercise)
def test_api_exercise_list_get(self):
""" Retrieves a list of available exercises. """
ui_exercises: UpdateInfo = UpdateInfo.from_dict(resource_type=ResourceType.exercise_list.name,
last_modified_time=1.0, created_time=1.0)
db.session.add(ui_exercises)
DatabaseService.commit()
args: dict = dict(lang="fr", last_update_time=int(time.time()))
response: Response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_EXERCISE_LIST,
query_string=args)
self.assertEqual(json.loads(response.get_data()), [])
args["last_update_time"] = 0
db.session.add(Mocks.exercise)
DatabaseService.commit()
response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_EXERCISE_LIST, query_string=args)
exercises: List[MatchingExercise] = []
for exercise_dict in json.loads(response.get_data(as_text=True)):
exercise_dict["search_values"] = json.dumps(exercise_dict["search_values"])
exercise_dict["solutions"] = json.dumps(exercise_dict["solutions"])
exercises.append(MatchingExercise.from_dict(exercise_dict))
self.assertEqual(len(exercises), 1)
args = dict(lang=Language.English.value, vocabulary=VocabularyCorpus.agldt.name, frequency_upper_bound=500)
response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_EXERCISE_LIST, query_string=args)
exercises: List[dict] = json.loads(response.get_data())
self.assertTrue(exercises[0]["matching_degree"])
db.session.query(Exercise).delete()
db.session.query(UpdateInfo).delete()
session.make_transient(Mocks.exercise)

Konstantin Schulz
committed
def test_api_exercise_post(self):
""" Creates a new exercise from scratch. """
db.session.query(UpdateInfo).delete()
ui_exercises: UpdateInfo = UpdateInfo.from_dict(resource_type=ResourceType.exercise_list.name,
last_modified_time=1.0, created_time=1.0)

Konstantin Schulz
committed
db.session.add(ui_exercises)
DatabaseService.commit()
ef: ExerciseForm = ExerciseForm(urn=Mocks.exercise.urn, type=ExerciseType.matching.value,
search_values=Mocks.exercise.search_values, instructions='abc')
with patch.object(mcserver.app.api.exerciseAPI, "get_graph_data") as mock_ggd:
graph_data_mock: dict = Mocks.annis_response_dict
graph_data_mock["graph_data"] = Mocks.copy(Mocks.graph_data, GraphData)
mock_ggd.side_effect = [graph_data_mock, ValueError()]

Konstantin Schulz
committed
with patch.object(mcserver.app.services.textComplexityService.TextComplexityService,
"text_complexity", return_value=Mocks.text_complexity):
response: Response = Mocks.app_dict[self.class_name].client.post(
Config.SERVER_URI_EXERCISE, headers=Mocks.headers_form_data, data=ef.to_dict())
ar: AnnisResponse = AnnisResponse.from_dict(json.loads(response.get_data(as_text=True)))
self.assertEqual(len(ar.solutions), 3)
response = Mocks.app_dict[self.class_name].client.post(
Config.SERVER_URI_EXERCISE, headers=Mocks.headers_form_data, data=ef.to_dict())
self.assertEqual(response.status_code, 500)
Mocks.app_dict[self.class_name].app_context.push()
db.session.query(UpdateInfo).delete()
def test_api_file_get(self):
"""Gets an existing exercise"""
ui_file: UpdateInfo = UpdateInfo.from_dict(resource_type=ResourceType.file_api_clean.name,
last_modified_time=1.0, created_time=1.0)
DatabaseService.commit()
# create a fake old file, to be deleted on the next GET request
FileService.create_tmp_file(FileType.XML, "old")
args: dict = dict(type=FileType.XML, id=Mocks.exercise.eid, solution_indices=[0])
response: Response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_FILE,
query_string=args)
self.assertEqual(response.status_code, 404)
file_path: str = os.path.join(Config.TMP_DIRECTORY, Mocks.exercise.eid + "." + FileType.XML)
file_content: str = "<xml></xml>"
with open(file_path, "w+") as f:
f.write(file_content)
ui_file.last_modified_time = datetime.utcnow().timestamp()
DatabaseService.commit()
response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_FILE, query_string=args)
self.assertEqual(response.get_data(as_text=True), file_content)
# add the mapped exercise to the database
db.session.add(Mocks.exercise)
DatabaseService.commit()
args["type"] = FileType.PDF
response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_FILE, query_string=args)
# the PDFs are not deterministically reproducible because the creation date etc. is written into them
self.assertTrue(response.data.startswith(Mocks.exercise_pdf))
db.session.query(Exercise).delete()
session.make_transient(Mocks.exercise)
def test_api_file_post(self):
""" Posts exercise data to be saved temporarily or permanently on the server, e.g. for downloading. """
learning_result: str = Mocks.xapi_json_string
Mocks.app_dict[self.class_name].client.post(TestingConfig.SERVER_URI_FILE, headers=Mocks.headers_form_data,

Konstantin Schulz
committed
data=dict(learning_result=learning_result))

Konstantin Schulz
committed
lrs: List[LearningResult] = DatabaseService.query(LearningResult)
self.assertEqual(len(lrs), 1)
data_dict: dict = dict(file_type=FileType.XML, urn=Mocks.urn_custom, html_content="<html></html>")

Konstantin Schulz
committed
response: Response = Mocks.app_dict[self.class_name].client.post(
TestingConfig.SERVER_URI_FILE, headers=Mocks.headers_form_data, data=data_dict)
file_name = json.loads(response.data.decode("utf-8"))
self.assertTrue(file_name.endswith(".xml"))
os.remove(os.path.join(Config.TMP_DIRECTORY, file_name))
LearningResult.query.delete()
def test_api_frequency_get(self):
""" Requests a frequency analysis for a given URN. """

Konstantin Schulz
committed
expected_fa: List[FrequencyItem] = [
FrequencyItem(values=[Dependency.object.name], phenomena=[Phenomenon.DEPENDENCY], count=1),
FrequencyItem(values=[PartOfSpeech.adjective.name], phenomena=[Phenomenon.UPOSTAG], count=1)]
with patch.object(
mcserver.app.services.corpusService.CorpusService, "get_frequency_analysis",
side_effect=[[FrequencyItem(values=[], phenomena=[], count=0)], expected_fa]):
response: Response = Mocks.app_dict[self.class_name].client.get(
TestingConfig.SERVER_URI_FREQUENCY, query_string=dict(urn=Mocks.urn_custom))
result_list: List[dict] = json.loads(response.get_data(as_text=True))
fa: List[FrequencyItem] = [FrequencyItem.from_dict(x) for x in result_list]
self.assertEqual(len(fa), 1)

Konstantin Schulz
committed
response = Mocks.app_dict[self.class_name].client.get(
TestingConfig.SERVER_URI_FREQUENCY, query_string=dict(urn=Mocks.urn_custom))
result_list: List[dict] = json.loads(response.get_data(as_text=True))
fa: List[FrequencyItem] = [FrequencyItem.from_dict(x) for x in result_list]
self.assertEqual(fa[0].values, expected_fa[0].values)
self.assertEqual(fa[1].values[0], None)
def test_api_h5p_get(self):
""" Requests a H5P JSON file for a given exercise. """

Konstantin Schulz
committed
response: Response = Mocks.app_dict[self.class_name].client.get(
TestingConfig.SERVER_URI_H5P, query_string=dict(eid=Config.EXERCISE_ID_TEST, lang=Language.English.value))
self.assertIn(Mocks.h5p_json_cloze[1:-1], response.get_data(as_text=True))

Konstantin Schulz
committed
args: dict = dict(eid=Mocks.exercise.eid, lang=Language.English.value, solution_indices=[0])

Konstantin Schulz
committed
response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_H5P, query_string=args)
self.assertEqual(response.status_code, 404)
db.session.add(Mocks.exercise)
DatabaseService.commit()
response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_H5P, query_string=args)
self.assertIn(Mocks.h5p_json_cloze[1:-1], response.get_data(as_text=True))
Mocks.exercise.exercise_type = ExerciseType.kwic.value
DatabaseService.commit()
response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_H5P, query_string=args)
self.assertEqual(response.status_code, 422)
Mocks.exercise.exercise_type = ExerciseType.matching.value
DatabaseService.commit()
response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_H5P, query_string=args)
self.assertIn(Mocks.h5p_json_matching[1:-1], response.get_data(as_text=True))
Mocks.exercise.exercise_type = ExerciseType.cloze.value
args["lang"] = "fr"
response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_H5P, query_string=args)
self.assertIn(Mocks.h5p_json_cloze[1:-1], response.get_data(as_text=True))
db.session.query(Exercise).delete()
session.make_transient(Mocks.exercise)
response = NetworkService.make_json_response(dict())
with patch.object(mcserver.app.api.h5pAPI, "get_remote_exercise", return_value=response) as get_mock:
args["eid"] = f".{FileType.H5P}"
Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_H5P, query_string=args)
self.assertEqual(get_mock.call_count, 1)
def test_api_h5p_post(self):
"""The POST method for the H5P REST API. It offers client-side H5P exercises for download as ZIP archives."""
exercise: Exercise = copy.deepcopy(Mocks.exercise)
hf: H5PForm = H5PForm(eid=Mocks.exercise.eid, exercise_type_path=ExerciseTypePath.DRAG_TEXT,
lang=Language.English.value, solution_indices=[])
response: Response = Mocks.app_dict[self.class_name].client.post(
TestingConfig.SERVER_URI_H5P, headers=Mocks.headers_form_data, data=hf.to_dict())
self.assertEqual(response.status_code, 404)
db.session.add(exercise)
DatabaseService.commit()
response = Mocks.app_dict[self.class_name].client.post(
TestingConfig.SERVER_URI_H5P, headers=Mocks.headers_form_data, data=hf.to_dict())

Konstantin Schulz
committed
self.assertEqual(len(response.get_data()), 1940145)
with patch.object(mcserver.app.api.h5pAPI, "get_text_field_content", return_value=""):
response = Mocks.app_dict[self.class_name].client.post(
TestingConfig.SERVER_URI_H5P, headers=Mocks.headers_form_data, data=hf.to_dict())
self.assertEqual(response.status_code, 422)
db.session.query(Exercise).delete()
def test_api_kwic_post(self):
""" Posts an AQL query to create a KWIC visualization in SVG format. """
gd: GraphData = Mocks.copy(Mocks.graph_data, GraphData)

Konstantin Schulz
committed
ed1: ExerciseData = ExerciseService.map_graph_data_to_exercise(
gd, "", [Solution(target=make_solution_element_from_salt_id(
'salt:/urn:custom:latinLit:proiel.pal-agr.lat:1.1.1/doc1#sent159692tok1'))])

Konstantin Schulz
committed
ed2: ExerciseData = ExerciseService.map_graph_data_to_exercise(
gd, "", [Solution(target=make_solution_element_from_salt_id(
'salt:/urn:custom:latinLit:proiel.pal-agr.lat:1.1.1/doc1#sent159695tok10'))])
ed2.graph.nodes = ed2.graph.nodes[42:]
mr: MockResponse = MockResponse(json.dumps([ed1.serialize(), ed2.serialize()]))

Konstantin Schulz
committed
kf: KwicForm = KwicForm(ctx_left=5, ctx_right=5, search_values=Mocks.exercise.search_values,
urn=Mocks.urn_custom)
with patch.object(mcserver.app.services.corpusService.requests, "post", return_value=mr):
response: Response = Mocks.app_dict[self.class_name].client.post(

Konstantin Schulz
committed
TestingConfig.SERVER_URI_KWIC, headers=Mocks.headers_form_data, data=kf.to_dict())
self.assertTrue(response.data.startswith(Mocks.kwic_svg))
def test_api_not_found(self):
"""Checks the 404 response in case of an invalid API query URL."""
response: Response = Mocks.app_dict[self.class_name].client.get("/")
self.assertEqual(response.status_code, 404)

Konstantin Schulz
committed
def test_api_raw_text_get(self):
""" Retrieves the raw text for a given URN. """

Konstantin Schulz
committed
with patch.object(mcserver.app.services.corpusService.CorpusService, "get_corpus") as mock_get_corpus:
with patch.object(mcserver.app.services.textComplexityService.TextComplexityService,
"text_complexity", return_value=Mocks.text_complexity):
mock_get_corpus.return_value = AnnisResponse(
graph_data=GraphData(links=[], nodes=[]), solutions=[])
response: Response = Mocks.app_dict[self.class_name].client.get(
TestingConfig.SERVER_URI_RAW_TEXT, query_string=dict(urn=Mocks.urn_custom))
self.assertEqual(response.status_code, 404)
mock_get_corpus.return_value = Mocks.annis_response
response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_RAW_TEXT,
query_string=dict(urn=Mocks.urn_custom))
ar: AnnisResponse = AnnisResponse.from_dict(json.loads(response.get_data(as_text=True)))
self.assertEqual(len(ar.graph_data.nodes), 52)
ar_copy: AnnisResponse = Mocks.copy(Mocks.annis_response, AnnisResponse)

Konstantin Schulz
committed
ar_copy.graph_data.nodes = []
mock_get_corpus.return_value = ar_copy
response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_RAW_TEXT,
query_string=dict(urn=Mocks.urn_custom))
self.assertEqual(response.status_code, 404)
def test_api_raw_text_post(self):
""" Provides annotations and text complexity for arbitrary Latin texts. """
rtfe: RawTextFormExtension = RawTextFormExtension(Mocks.raw_text)
with patch.object(AnnotationService, "get_udpipe", return_value=Mocks.udpipe_string):
response: Response = Mocks.app_dict[self.class_name].client.post(
TestingConfig.SERVER_URI_RAW_TEXT, headers=Mocks.headers_form_data, data=rtfe.to_dict())
ar: AnnisResponse = AnnisResponse.from_dict(json.loads(response.get_data()))
self.assertEqual(len(ar.graph_data.nodes), 5)
""" Retrieves static exercises from the frontend and publishes deep URLs for each one of them. """
exercises: List[Tuple[str, str, str]] = [
(ExerciseTypePath.FILL_BLANKS,) + Mocks.h5p_json_fill_blanks_1,
(ExerciseTypePath.FILL_BLANKS,) + Mocks.h5p_json_fill_blanks_3,
(ExerciseTypePath.FILL_BLANKS,) + Mocks.h5p_json_fill_blanks_4,
(ExerciseTypePath.FILL_BLANKS,) + Mocks.h5p_json_fill_blanks_13,
(ExerciseTypePath.DRAG_TEXT, "1_en", Mocks.h5p_json_cloze),
(ExerciseTypePath.MULTI_CHOICE, "1_en", Mocks.h5p_json_multi_choice),
(ExerciseTypePath.MULTI_CHOICE, "2_en", Mocks.h5p_json_multi_choice_2),
(ExerciseTypePath.MULTI_CHOICE,) + Mocks.h5p_json_multi_choice_9,
(ExerciseTypePath.VOC_LIST, "1_en", Mocks.h5p_json_voc_list)]
paths: List[str] = []
for exercise in exercises:
file_name: str = exercise[1] + ".json"
file_path: str = os.path.join(Config.TMP_DIRECTORY, exercise[0], "content", file_name)
os.makedirs(os.path.split(file_path)[0], exist_ok=True)
with open(file_path, "w+") as f:
json.dump(json.loads(exercise[2]), f)
paths.append(file_path)
with ZipFile(TestingConfig.STATIC_EXERCISES_ZIP_FILE_PATH, "w") as z:
for path in paths:
z.write(path)
for exercise in exercises:
shutil.rmtree(os.path.join(Config.TMP_DIRECTORY, exercise[0]), ignore_errors=True)
with open(TestingConfig.STATIC_EXERCISES_ZIP_FILE_PATH, "rb") as f:
zip_content: bytes = f.read()
with patch.object(
mcserver.app.api.staticExercisesAPI.requests, "get", side_effect=[
MockResponse("{}", ok=False), MockResponse("{}", content=zip_content)]):
with patch.object(AnnotationService, "get_udpipe",
return_value=Mocks.static_exercises_udpipe_string) as mock_udpipe:
response: Response = Mocks.app_dict[self.class_name].client.get(
TestingConfig.SERVER_URI_STATIC_EXERCISES)
self.assertEqual(response.status_code, 503)
response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_STATIC_EXERCISES)
os.remove(TestingConfig.STATIC_EXERCISES_ZIP_FILE_PATH)
self.assertGreater(len(response.get_data(as_text=True)), 1900)
response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_STATIC_EXERCISES)
self.assertEqual(mock_udpipe.call_count, 1)

Konstantin Schulz
committed
def test_api_text_complexity_get(self):
""" Calculates text complexity measures for a given URN. """
gd: GraphData = Mocks.copy(Mocks.graph_data, GraphData)
tc: TextComplexity = TextComplexityService.text_complexity("n_w", Mocks.urn_custom, gd)

Konstantin Schulz
committed
self.assertEqual(tc.n_w, 52)
with patch.object(mcserver.app.services.corpusService.CorpusService, "get_corpus",
return_value=Mocks.annis_response):
with patch.object(mcserver.app.services.textComplexityService.TextComplexityService,
"text_complexity", return_value=Mocks.text_complexity):
args: dict = dict(urn=Mocks.urn_custom, measure=TextComplexityMeasure.all.name)
response: Response = Mocks.app_dict[self.class_name].client.get(
TestingConfig.SERVER_URI_TEXT_COMPLEXITY, query_string=args)
self.assertEqual(response.get_data(as_text=True), json.dumps(Mocks.text_complexity.to_dict()))
def test_api_valid_reff_get(self): #
""" Retrieves possible citations for a given URN. """
with patch.object(MyCapytain.retrievers.cts5.requests, "get", side_effect=TestHelper.cts_get_mock):
args: dict = dict(urn=Mocks.urn_custom[:-14])
response: Response = Mocks.app_dict[self.class_name].client.get(
TestingConfig.SERVER_URI_VALID_REFF, query_string=args)
self.assertEqual(len(json.loads(response.data.decode("utf-8"))), 3)
APItestCase.clear_folder(Config.REFF_CACHE_DIRECTORY)
args["urn"] = f"{Mocks.urn_custom[:-13]}4"
response = Mocks.app_dict[self.class_name].client.get(
TestingConfig.SERVER_URI_VALID_REFF, query_string=args)
self.assertEqual(response.status_code, 404)
APItestCase.clear_folder(Config.REFF_CACHE_DIRECTORY)
args["urn"] = f"{Mocks.urn_custom[:-13]}abc"
response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_VALID_REFF,
query_string=args)
self.assertEqual(response.status_code, 400)
APItestCase.clear_folder(Config.REFF_CACHE_DIRECTORY)
TestingConfig.SIMULATE_HTTP_ERROR = True
self.assertEqual(len(CorpusService.get_standard_corpus_reff(Mocks.urn[:-8])), 0)
TestingConfig.SIMULATE_HTTP_ERROR = False
reff: List[str] = CorpusService.get_standard_corpus_reff(Mocks.urn[:-8])
self.assertEqual(len(reff), 7)
def test_api_vector_network_get(self):
""" Builds a network of semantically similar vectors for a given list of words. """
with patch.object(mcserver.app.api.vectorNetworkAPI, "add_edges", side_effect=Mocks.mock_add_eges):
with patch.object(mcserver.app.api.vectorNetworkAPI.Word2Vec, "load", return_value=MockW2V()):
args: dict = dict(search_regex='ueritas', nearest_neighbor_count=150, min_count=6)
response: Response = Mocks.app_dict[self.class_name].client.get(
TestingConfig.SERVER_URI_VECTOR_NETWORK, query_string=args)

Konstantin Schulz
committed
svg_string: str = json.loads(response.get_data(as_text=True))
self.assertGreater(len(svg_string), 6500)
def test_api_vector_network_post(self):
""" Returns contexts that are semantically similar to a given query. """
mock_data: str = "This is a sentence.\nAnd here is yet another one.\n"
with patch("mcserver.app.api.vectorNetworkAPI.open", mock_open(read_data=mock_data)):
with patch.object(mcserver.app.api.vectorNetworkAPI.Word2Vec, "load", return_value=MockW2V()):

Konstantin Schulz
committed
vnf: VectorNetworkForm = VectorNetworkForm(search_regex='uera', nearest_neighbor_count=10)
response: Response = Mocks.app_dict[self.class_name].client.post(

Konstantin Schulz
committed
TestingConfig.SERVER_URI_VECTOR_NETWORK, headers=Mocks.headers_form_data, data=vnf.to_dict())
self.assertEqual(len(json.loads(response.get_data(as_text=True))), 2)

Konstantin Schulz
committed
def test_api_vocabulary_get(self):
""" Retrieves sentence ID and matching degree for each sentence in the query text. """
vf: VocabularyForm = VocabularyForm(query_urn="", vocabulary=VocabularyMC.AGLDT, frequency_upper_bound=6000)
response: Response = Mocks.app_dict[self.class_name].client.get(
TestingConfig.SERVER_URI_VOCABULARY, query_string=vf.to_dict())
self.assertEqual(response.status_code, 404)
with patch.object(mcserver.app.services.CorpusService, "get_corpus",
return_value=Mocks.copy(Mocks.annis_response, AnnisResponse)):
vf.query_urn=Mocks.urn_custom
response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_VOCABULARY,

Konstantin Schulz
committed
sentences: List[Sentence] = [Sentence.from_dict(x) for x in json.loads(response.get_data(as_text=True))]
self.assertEqual(sentences[0].matching_degree, 90.9090909090909)

Konstantin Schulz
committed

Konstantin Schulz
committed
def test_api_vocabulary_post(self):

Konstantin Schulz
committed
""" Indicates for each token of a corpus whether it is covered by a reference vocabulary. """

Konstantin Schulz
committed
with patch.object(mcserver.app.services.corpusService.CorpusService, "get_corpus",
return_value=Mocks.annis_response):

Konstantin Schulz
committed
vf: VocabularyForm = VocabularyForm(frequency_upper_bound=500, query_urn=Mocks.urn_custom,
vocabulary=VocabularyMC.AGLDT)
response: Response = Mocks.app_dict[self.class_name].client.post(
TestingConfig.SERVER_URI_VOCABULARY, data=vf.to_dict())
ar: AnnisResponse = AnnisResponse.from_dict(json.loads(response.get_data(as_text=True)))
self.assertTrue(NodeMC.from_dict(ar.graph_data.nodes[3].to_dict()).is_oov)

Konstantin Schulz
committed
def test_api_zenodo_get(self):
""" Provides exercise materials from the Zenodo repository."""
mock_records: List[Any] = [MagicMock()]
title: str = "my_title"
mock_records[0].metadata = dict(title=[title])
with patch.object(mcserver.app.api.zenodoAPI.Sickle, "ListRecords", return_value=mock_records):
response: Response = Mocks.app_dict[self.class_name].client.get(TestingConfig.SERVER_URI_ZENODO)
records: List[ZenodoRecord] = [ZenodoRecord.from_dict(x) for x in
json.loads(response.get_data(as_text=True))]
self.assertEqual(records[0].title, [title])

Konstantin Schulz
committed
def test_api_zenodo_post(self):
zf: ZenodoForm = ZenodoForm(record_id=4548959)
record_mock: MagicMock = MagicMock()
uris: List[str] = ["http"]
record_mock.metadata = dict(subfield=uris)
with patch.object(mcserver.app.api.zenodoAPI.Sickle, "GetRecord", return_value=record_mock):
response: Response = Mocks.app_dict[self.class_name].client.post(
TestingConfig.SERVER_URI_ZENODO, data=zf.to_dict())
self.assertEqual(json.loads(response.get_data()), uris)
def test_create_app(self):
"""Creates a new Flask application and configures it. Initializes the application and the database."""
with patch.object(sys, "argv", [None, None, Config.FLASK_MIGRATE]):
with patch.object(mcserver.app, "init_app_common", return_value=Flask(__name__)):
cfg: Type[Config] = TestingConfig
old_uri: str = cfg.SQLALCHEMY_DATABASE_URI
create_app(cfg)
self.assertEqual(cfg.SQLALCHEMY_DATABASE_URI, Config.DATABASE_URL_LOCAL)
cfg.SQLALCHEMY_DATABASE_URI = old_uri
Mocks.app_dict[self.class_name].app_context.push()
def test_get_favicon(self):
"""Sends the favicon to browsers, which is used, e.g., in the tabs as a symbol for our application."""
response: Response = Mocks.app_dict[self.class_name].client.get(Config.SERVER_URI_FAVICON)
with open(os.path.join(Config.ASSETS_DIRECTORY, Config.FAVICON_FILE_NAME), "rb") as f:
content: bytes = f.read()
data_received: bytes = response.get_data()
self.assertEqual(content, data_received)
class CommonTestCase(unittest.TestCase):
""" Test suite for general functions. """
def setUp(self):
"""Initializes the testing environment."""
self.start_time = time.time()
self.class_name: str = str(self.__class__)

Konstantin Schulz
committed
TestHelper.update_flask_app(self.class_name, create_app)
def tearDown(self):
"""Finishes testing by removing the traces."""
print("{0}: {1} seconds".format(self.id(), "%.2f" % (time.time() - self.start_time)))
def test_add_dependency_frequencies(self):
""" Performs a frequency analysis for dependency annotations in a corpus. """
gd_copy: GraphData = Mocks.copy(Mocks.graph_data, GraphData)

Konstantin Schulz
committed
gd_copy.links[0].udep_deprel = "safebpfw"
gd_copy.links[48].udep_deprel = "fkonürwür"
fis: List[FrequencyItem] = FrequencyService.add_dependency_frequencies(gd_copy, [])
self.assertEqual(len(fis), 134)
def test_add_edges(self):
"""Adds edges to an existing graph based on a list of keys and constraints to their similarity and frequency."""
from mcserver.app.api.vectorNetworkAPI import add_edges
w2v: Word2Vec = Word2Vec([x.split() for x in Mocks.raw_text.split(". ")], min_count=1, sample=0)
graph: Graph = Graph()
add_edges(["fortis"], w2v, 4, 1, graph)
self.assertGreater(len(graph.edges), 1)
def test_add_urn_to_sentences(self):
""" Adds the relevant URN for every annotated sentence. """
conll: List[TokenList] = copy.deepcopy(Mocks.annotations)

Konstantin Schulz
committed
text_list: List[ReferenceableText] = [
ReferenceableText(conll[0].tokens[0]["form"], Mocks.urn),
ReferenceableText("", Mocks.urn_custom), ReferenceableText(conll[0].tokens[0]["form"], Mocks.urn_custom)]
conll[0].tokens[0]["form"] += "."
conll.append(TokenList(tokens=[
{"id": 1, "form": "Caesar.", "lemma": "Caeso", "upostag": "VERB", "xpostag": "L3|modJ|tem3|gen4|stAV",
"feats": {"Mood": "Ind", "Number": "Sing", "Person": "1", "Tense": "Fut", "VerbForm": "Fin",
"Voice": "Pass"}, "head": 0, "deprel": "root", "deps": None, "misc": {"ref": "1.1"}}],
metadata=OrderedDict([("sent_id", "2"), ("urn", "")])))

Konstantin Schulz
committed
conll += copy.deepcopy(Mocks.annotations)
AnnotationService.add_urn_to_sentences(text_list, conll)
self.assertEqual(conll[0].metadata["urn"], Mocks.urn)
self.assertEqual(conll[1].metadata["urn"], "")

Konstantin Schulz
committed
self.assertEqual(conll[2].metadata["urn"], Mocks.urn_custom)

Konstantin Schulz
committed
def test_check_vocabulary(self):
""" Checks whether the lemmata of a given graph/text match a reference vocabulary. """
gd_copy: GraphData = Mocks.copy(Mocks.graph_data, GraphData)
new_node: NodeMC = Mocks.copy(gd_copy.nodes[0], NodeMC)

Konstantin Schulz
committed
new_node.id = "salt:/urn:custom:latinLit:proiel.pal-agr.lat:1.1.1/doc1#sent159693tok1"
gd_copy.nodes.append(new_node)
sentences: List[Sentence] = check_vocabulary(gd_copy, {""})

Konstantin Schulz
committed
self.assertEqual(sentences[1].matching_degree, 3.225806451612903)
def test_clean_tmp_folder(self):
""" Cleans the files directory regularly. """
dir_path: str = os.path.join(Config.TMP_DIRECTORY, "test")
os.makedirs(dir_path)
self.assertTrue(os.path.exists(dir_path))
self.assertTrue(os.path.isdir(dir_path))
ui_cts: UpdateInfo = UpdateInfo.from_dict(resource_type=ResourceType.file_api_clean.name,
last_modified_time=1.0, created_time=1.0)
db.session.add(ui_cts)
DatabaseService.commit()
clean_tmp_folder()
db.session.query(UpdateInfo).delete()
self.assertFalse(os.path.exists(dir_path))
def test_create_xml_string(self):
"""Exports the exercise data to the Moodle XML format. See https://docs.moodle.org/35/en/Moodle_XML_format ."""
xml_string: str = XMLservice.create_xml_string(
ExerciseMC.from_dict(
exercise_type=ExerciseType.matching.value, last_access_time=0.0, eid=str(uuid.uuid4())),
[], FileType.PDF, [])
self.assertEqual(xml_string, Mocks.exercise_xml)

Konstantin Schulz
committed
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
def test_find_matches(self):
""" Finds matches for a given URN and AQL and returns the corresponding node IDs. """
expected_matches: List[str] = ["a", "b"]
with patch.object(Config.CORPUS_STORAGE_MANAGER, "find",
side_effect=[[expected_matches], NoSuchCorpus(""), [expected_matches]]):
matches: List[str] = CorpusService.find_matches(Mocks.urn_custom[:-6] + "3.1.1", "tok")
self.assertEqual(matches, expected_matches)
with patch.object(mcserver.app.services.corpusService.CorpusService, "get_corpus"):
matches = CorpusService.find_matches(Mocks.urn, "")
self.assertEqual(matches, expected_matches)
def test_full_init(self):
""" Fully initializes the application, including logging."""
Mocks.app_dict[self.class_name].app.config["TESTING"] = False
with patch.object(CorpusService, "init_graphannis_logging"):
with patch.object(mcserver.app, "start_updater") as updater_mock:
full_init(Mocks.app_dict[self.class_name].app)
self.assertEqual(updater_mock.call_count, 1)
Mocks.app_dict[self.class_name].app.config["TESTING"] = True
db.session.query(UpdateInfo).delete()
def test_get_annotations_from_string(self):
""" Gets annotation data from a given string, be it a CoNLL string or a corpus URN. """
conll: List[TokenList]
with patch.object(AnnotationService, "get_udpipe", return_value=Mocks.udpipe_string):
with patch.object(CorpusService, "load_text_list", return_value=Mocks.text_list):
with patch.object(CorpusService, "get_raw_text", return_value=Mocks.raw_text):
conll = CorpusService.get_annotations_from_string(Mocks.urn)
self.assertEqual(len(conll[0]), 5)
mdg: MultiDiGraph = CorpusService.get_graph(Mocks.urn)
self.assertEqual(len(mdg.nodes), 5)
mdg = CorpusService.get_graph(f"{Mocks.urn}@1-1")
self.assertEqual(len(mdg.nodes), 5)
with patch.object(CustomCorpusService, "get_treebank_annotations", return_value=Mocks.annotations):
conll = CorpusService.get_annotations_from_string(Mocks.urn_custom)
self.assertEqual(len(conll[0]), 6)
with patch.object(CustomCorpusService, "get_custom_corpus_annotations",
return_value=Mocks.annotations * 2):
urn: str = f"{Config.CUSTOM_CORPUS_VIVA_URN}:1.1-1.1"
conll = CorpusService.get_annotations_from_string(urn)
self.assertEqual(len(conll), 2)
def test_get_api_specification(self):
""" Reads, parses and caches the OpenAPI specification including all shared models. """
spec: dict = get_api_specification()
os.remove(Config.API_SPEC_CACHE_PATH)
with patch.object(mcserver.app.prance.ResolvingParser, "parse"):
get_api_specification()
self.assertTrue(os.path.exists(Config.API_SPEC_CACHE_PATH))
# restore the old cache, so that other unit tests can be initialized faster
api_last_modified_time: float = os.path.getmtime(Config.API_SPEC_MCSERVER_FILE_PATH)
models_last_modified_time: float = os.path.getmtime(Config.API_SPEC_MODELS_YAML_FILE_PATH)
lmt: float = api_last_modified_time + models_last_modified_time
with open(Config.API_SPEC_CACHE_PATH, "wb+") as f:
pickle.dump(dict(lmt=lmt, spec=spec), f)
def test_get_concept_network(self):
"""Extracts a network of words from vector data in an AI model."""
from mcserver.app.api.vectorNetworkAPI import get_concept_network
with patch.object(mcserver.app.api.vectorNetworkAPI, "add_edges", side_effect=Mocks.mock_add_eges):
with patch.object(mcserver.app.api.vectorNetworkAPI.Word2Vec, "load", return_value=MockW2V()):
svg_string: str = get_concept_network("ueritas", highlight_regex_string="uera")
self.assertGreater(len(svg_string), 6500)

Konstantin Schulz
committed
def test_get_frequency_analysis(self):
""" Collects frequency statistics for various combinations of linguistic annotations in a corpus. """
with patch.object(
mcserver.app.services.corpusService.CorpusService, "get_frequency_analysis",
return_value=[FrequencyItem(values=[], phenomena=[], count=0)]):
fa: List[FrequencyItem] = CorpusService.get_frequency_analysis(urn=Mocks.urn_custom)
self.assertEqual(len(fa), 1)
CorpusService.get_corpus(Mocks.urn_custom)
with patch.object(CorpusService, "get_corpus", return_value=Mocks.annis_response):
fa = CorpusService.get_frequency_analysis(Mocks.urn_custom)

Konstantin Schulz
committed
self.assertEqual(len(fa), 214)

Konstantin Schulz
committed
def test_get_graph(self):
""" Retrieves a graph from the cache or, if not there, builds it from scratch. """
expected_mdg: MultiDiGraph = MultiDiGraph([(1, 2), (2, 3), (3, 4)])
with patch.object(Config.CORPUS_STORAGE_MANAGER, "subcorpus_graph", return_value=expected_mdg):
mdg: MultiDiGraph = CorpusService.get_graph(Mocks.urn)
self.assertEqual(mdg, expected_mdg)

Konstantin Schulz
committed
def test_get_graph_data(self):
"""Sends annotated text data or a URN to the Corpus Storage Manager in order to get a graph."""
with patch.object(mcserver.app.services.corpusService.CorpusService, "get_annotations_from_string",
return_value=Mocks.annotations):
with patch.object(mcserver.app.services.corpusService.CorpusService, "process_corpus_data",
return_value=Mocks.annis_response_dict):
result: dict = get_graph_data("", "", [], ExerciseType.matching, [])
self.assertEqual(result, Mocks.annis_response_dict)

Konstantin Schulz
committed
def test_get_matches(self):
"""Retrieves search results from ANNIS for a given corpus and AQL query."""
disk_urn: str = AnnotationService.get_disk_urn(Mocks.urn_custom)
AnnotationService.map_conll_to_graph(corpus_name=Mocks.urn_custom, conll=Mocks.annotations,
file_name=disk_urn)
solutions: List[Solution] = CorpusService.get_matches(Mocks.urn_custom, ['tok ->dep tok'],
[Phenomenon.DEPENDENCY])
self.assertEqual(len(solutions), 5)
solutions = CorpusService.get_matches(Mocks.urn_custom, ['upostag="VERB" ->dep tok'],
[Phenomenon.UPOSTAG, Phenomenon.DEPENDENCY])
self.assertEqual(len(solutions), 5)
solutions = CorpusService.get_matches(Mocks.urn_custom, ['tok ->dep tok ->dep tok'],
[Phenomenon.DEPENDENCY, Phenomenon.UPOSTAG])
self.assertEqual(len(solutions), 3)
def test_get_pdf_html_string(self):
""" Builds an HTML string from an exercise, e.g. to construct a PDF from it. """
Mocks.exercise.exercise_type = ExerciseType.matching.value
solutions: List[Solution] = [Solution.from_dict(x) for x in json.loads(Mocks.exercise.solutions)]
result: str = FileService.get_pdf_html_string(Mocks.exercise, Mocks.annotations, FileType.PDF, solutions)

Konstantin Schulz
committed
expected_result: str = '<br><p>Cloze: Assign the words from the pool to the correct gaps!</p><p><table><tr><td>praecepturus</td><td>Caesar</td></tr></table></p>'
self.assertEqual(result, expected_result)
Mocks.exercise.exercise_type = ExerciseType.markWords.value
result = FileService.get_pdf_html_string(Mocks.exercise, Mocks.annotations, FileType.PDF, solutions)

Konstantin Schulz
committed
expected_result = '<p>Cloze: Assign the words from the pool to the correct gaps!</p><p>Caesar et Galli fortes sunt.</p><br><br>'
self.assertEqual(result, expected_result)
Mocks.exercise.exercise_type = ExerciseType.cloze.value
def test_get_raw_text(self):
""" Retrieves the raw text for a corpus. """
with patch.object(CorpusService, "get_corpus", return_value=Mocks.annis_response):

Konstantin Schulz
committed
text: str = CorpusService.get_raw_text(Mocks.urn)
def test_get_remote_exercise(self):
""" Retrieves an H5P archive from a remote location and builds a JSON template for it."""
class ZipMock:
def open(self, *args, **kwargs):
open_mock: MagicMock = MagicMock()
open_mock.read.return_value = '{"mainLibrary": "lib"}'
return open_mock
response_mock: MagicMock = MagicMock()
response_mock.content = b""
with patch.object(mcserver.app.api.h5pAPI.requests, "get", return_value=response_mock):
with patch.object(mcserver.app.api.h5pAPI, "ZipFile", return_value=ZipMock):
response: Response = get_remote_exercise("")
h5p_dict: dict = json.loads(response.get_data())
self.assertEqual(h5p_dict["mainLibrary"], "lib")
def test_get_solutions_by_index(self):
""" If available, makes use of the solution indices to return only the wanted solutions. """
solutions: List[Solution] = TextService.get_solutions_by_index(Mocks.exercise, [])
self.assertEqual(len(solutions), 1)

Konstantin Schulz
committed
def test_get_subgraph(self):
""" Retrieves subgraph data for a given URN. """
gd: GraphData = Mocks.copy(Mocks.graph_data, GraphData)

Konstantin Schulz
committed
with patch.object(mcserver.app.services.annotationService.AnnotationService, "get_single_subgraph",

Konstantin Schulz
committed
ar: AnnisResponse = CorpusService.get_subgraph(Mocks.urn_custom, 'tok="quarum"', 0, 0)
self.assertEqual(len(ar.graph_data.nodes), len(Mocks.graph_data.nodes))
def test_get_treebank_annotations(self):
""" Retrieves annotations from a treebank. """
cache_path: str = os.path.join(Config.TREEBANKS_CACHE_DIRECTORY,
ntpath.basename(CustomCorpusService.custom_corpora[4].file_path) + ".pickle")
old_annotations: List[TokenList] = []
if os.path.exists(cache_path):
with open(cache_path, "rb") as f:
old_annotations = pickle.load(f)
os.remove(cache_path)
with patch.object(mcserver.app.services.customCorpusService.conllu, "parse",
return_value=Mocks.annotations) as parse_mock:
with patch.object(CustomCorpusService, "get_treebank_sub_annotations", return_value=Mocks.annotations):
conll: List[TokenList] = CustomCorpusService.get_treebank_annotations(Mocks.urn_custom)
self.assertIs(conll, Mocks.annotations)
with patch.object(mcserver.app.services.customCorpusService.pickle, "load", side_effect=ValueError):

Konstantin Schulz
committed
CustomCorpusService.get_treebank_annotations(Mocks.urn_custom)
self.assertEqual(parse_mock.call_count, 2)
# restore the old cache so that other unit tests do not operate on fake data
if old_annotations:
with open(cache_path, "wb+") as f:
pickle.dump(old_annotations, f)
def test_get_treebank_sub_annotations(self):
""" Retrieves annotations for nested parts of a treebank. """

Konstantin Schulz
committed
annotations: List[TokenList] = Mocks.annotations + \
[TokenList([], metadata=OrderedDict([("sent_id", "2")])),
TokenList([], metadata=OrderedDict([("sent_id", "3")]))]
conll: List[TokenList] = CustomCorpusService.get_treebank_sub_annotations(
Mocks.urn + "@1-3", annotations, CustomCorpusService.custom_corpora[4])
self.assertEqual(len(conll), 3)
cc: CustomCorpus = CustomCorpusService.get_custom_corpus_by_urn(Config.CUSTOM_CORPUS_VIVA_URN)
urn: str = cc.corpus.source_urn + ":1.1-1.2"

Konstantin Schulz
committed
CustomCorpusService.get_treebank_sub_annotations(urn, [], cc)
self.assertEqual(len(cc.text_parts), 2)

Konstantin Schulz
committed
conll = CustomCorpusService.get_treebank_sub_annotations(cc.corpus.source_urn + "I.1-I.2", [], cc)
self.assertEqual(conll, [])
def test_get_udpipe(self):
"""Annotates a single text with UdPipe."""
# The beginning of the CONLL has to be left out because it contains the randomly generated temp file path
# and thus cannot be predicted exactly.

Konstantin Schulz
committed
text: str = "Caesar fortis est. Galli moriuntur."
conll_string: str = AnnotationService.get_udpipe(text)
self.assertIn(Mocks.udpipe_string, conll_string)

Konstantin Schulz
committed
def test_handle_exercise_data(self):
""" Constructs an SVG image (for POS and syntactic dependencies) from given annotations. """
result: str = handle_exercise_data(ExerciseData(json_dict=Mocks.exercise_data.serialize()), 5, 5)
self.assertTrue(result.startswith('<svg height="160" id="svg1" width="224">'))
def test_imports(self):
""" Checks whether all necessary imports are available. """
success: bool = False
try:
import blinker # for signalling
import coverage # for code coverage in unit tests
from docx import Document # for exercise exports
from flask_cors import CORS # for HTTP requests
from flask_migrate import Migrate # for database migrations
from flask_restful import Api # for construction of the API
import gunicorn # for production server
from open_alchemy import init_yaml # for validation of data according to the API specification
import psycopg2 # for database access via SQLAlchemy
import rapidjson as json # for faster (de-)serialization using JSON
success = True
except ModuleNotFoundError:
pass
self.assertTrue(success)
def test_init_app_common(self):
""" Initializes common Flask parts, e.g. CORS, configuration, database, migrations and custom corpora."""
old_uri: str = DevelopmentConfig.SQLALCHEMY_DATABASE_URI
DevelopmentConfig.SQLALCHEMY_DATABASE_URI = Config.POSTGRES_STRING
with patch.object(mcserver.app.services.databaseService.DatabaseService, "init_db_alembic") as init_mock:
with patch.object(mcserver.app, "full_init") as full_init_mock:
with patch.object(mcserver.app, "db"):
with patch.object(mcserver.app, "migrate"):
with patch.object(mcserver.app, "database_exists", return_value=False):
with patch.object(mcserver.app, "create_postgres_database") as create_mock:
init_app_common(DevelopmentConfig)
self.assertEqual(init_mock.call_count, 1)
self.assertEqual(full_init_mock.call_count, 1)
self.assertEqual(create_mock.call_count, 1)
DevelopmentConfig.SQLALCHEMY_DATABASE_URI = old_uri
Mocks.app_dict[self.class_name].app_context.push()
def test_init_stop_words_latin(self):
"""Initializes the stop words list for Latin texts and caches it if necessary."""
def clear_cache():
if os.path.exists(Config.STOP_WORDS_LATIN_PATH):
os.remove(Config.STOP_WORDS_LATIN_PATH)
clear_cache()
stop_word_list: Dict[str, List[str]] = {"a": ["b"]}
mr: MockResponse = MockResponse(json.dumps(stop_word_list))

Konstantin Schulz
committed
with patch.object(mcserver.app.services.textService.requests, "get", return_value=mr) as \
mock_get_request:
TextService.init_stop_words_latin()
self.assertEqual(len(TextService.stop_words_latin), 1)
TextService.init_stop_words_latin()
self.assertEqual(mock_get_request.call_count, 1)

Konstantin Schulz
committed
def test_is_match(self):
""" Checks whether a given lemma is part of a reference vocabulary."""
self.assertTrue(TextService.is_match("neque", {"ne"}))

Konstantin Schulz
committed
self.assertTrue(TextService.is_match("facile", {"facilis"}))

Konstantin Schulz
committed
def test_load_text_list(self):
""" Loads the text list for a new corpus. """
with patch.object(mcserver.app.services.corpusService.HttpCtsRetriever, 'getPassage',
return_value=Mocks.cts_passage_xml) as get_passage_mock:

Konstantin Schulz
committed
text_parts: List[ReferenceableText] = CorpusService.load_text_list(Mocks.urn)
self.assertEqual(len(text_parts), 2)
get_passage_mock.return_value = Mocks.cts_passage_xml_2_levels
text_parts = CorpusService.load_text_list(Mocks.urn[:-8] + "-1.1")
self.assertEqual(len(text_parts), 1)
get_passage_mock.return_value = Mocks.cts_passage_xml_1_level
text_parts = CorpusService.load_text_list(Mocks.urn[:-10] + "-3")
self.assertEqual(len(text_parts), 3)
get_passage_mock.side_effect = HTTPError()

Konstantin Schulz
committed
text_parts = CorpusService.load_text_list(Mocks.urn)
self.assertEqual(text_parts, [])

Konstantin Schulz
committed
def test_log_exception(self):
"""Logs errors that occur while the Flask app is working. """
with patch.object(Mocks.app_dict[self.class_name].app.logger, "info") as mock_info:
with Mocks.app_dict[self.class_name].app.test_request_context("/?param=value"):
log_exception(Mocks.app_dict[self.class_name].app, ValueError())
self.assertEqual(mock_info.call_count, 1)
def test_make_docx_file(self):
""" Saves an exercise to a DOCX file (e.g. for later download). """
file_path: str = os.path.join(Config.TMP_DIRECTORY, "make_docx_file.docx")
solutions: List[Solution] = [Solution.from_dict(x) for x in json.loads(Mocks.exercise.solutions)]
FileService.make_docx_file(Mocks.exercise, file_path, Mocks.annotations, FileType.DOCX, solutions)

Konstantin Schulz
committed
self.assertEqual(os.path.getsize(file_path), 36647)
Mocks.exercise.exercise_type = ExerciseType.markWords.value
FileService.make_docx_file(Mocks.exercise, file_path, Mocks.annotations, FileType.DOCX, solutions)

Konstantin Schulz
committed
self.assertEqual(os.path.getsize(file_path), 36637)
Mocks.exercise.exercise_type = ExerciseType.matching.value
FileService.make_docx_file(Mocks.exercise, file_path, Mocks.annotations, FileType.DOCX, solutions)

Konstantin Schulz
committed
self.assertEqual(os.path.getsize(file_path), 36757)
Mocks.exercise.exercise_type = ExerciseType.cloze.value
os.remove(file_path)
def test_make_tmp_file_from_exercise(self):
""" Creates a temporary file from a given exercise, e.g. for downloading. """
df: DownloadableFile = FileService.make_tmp_file_from_exercise(FileType.XML, Mocks.exercise, [0])
self.assertTrue(os.path.exists(df.file_path))
os.remove(df.file_path)
df: DownloadableFile = FileService.make_tmp_file_from_exercise(FileType.DOCX, Mocks.exercise, [0])
self.assertTrue(os.path.exists(df.file_path))
os.remove(df.file_path)
def test_make_tmp_file_from_html(self):
""" Creates a temporary file from a given HTML string, e.g. for downloading. """
html: str = "<html lang='la'><p>test</p><span class='tok'><u>abc</u></span></html>"
df: DownloadableFile = FileService.make_tmp_file_from_html(Mocks.urn_custom, FileType.PDF, html)
self.assertTrue(os.path.exists(df.file_path))
os.remove(df.file_path)
df: DownloadableFile = FileService.make_tmp_file_from_html(Mocks.urn_custom, FileType.DOCX, html)
self.assertTrue(os.path.exists(df.file_path))
os.remove(df.file_path)

Konstantin Schulz
committed
def test_map_conll_to_graph(self):
""" Saves an annotated corpus in CONLL format to the ANNIS corpus storage. """
conll: List[TokenList] = Mocks.annotations + copy.deepcopy(Mocks.annotations)
conll[1].metadata = dict(sent_id="2")
disk_urn: str = AnnotationService.get_disk_urn(Mocks.urn_custom)
AnnotationService.map_conll_to_graph(corpus_name=Mocks.urn_custom, conll=conll, file_name=disk_urn)
result: dict = CorpusService.process_corpus_data(
urn=Mocks.urn_custom, annotations=conll, aqls=["tok"], exercise_type=ExerciseType.cloze,
search_phenomena=[Phenomenon.UPOSTAG])
gd: GraphData = result["graph_data"]

Konstantin Schulz
committed
self.assertEqual(gd.nodes[-1].id.split("/")[0], gd.nodes[0].id.split("/")[0])
def test_map_graph_data(self):
"""Maps graph data to exercise data."""

Konstantin Schulz
committed
ed_expected: ExerciseData = ExerciseData(json_dict=Mocks.exercise_data.serialize())
node_expected: NodeMC = ed_expected.graph.nodes[0]
node = {"id": node_expected.id, "annis::node_name": node_expected.annis_node_name,
"annis::node_type": node_expected.annis_node_type, "annis::tok": node_expected.annis_tok,
"annis::type": node_expected.annis_type, "udep::feats": node_expected.udep_feats,
"udep::lemma": node_expected.udep_lemma, "udep::upostag": node_expected.udep_upostag,
"udep::xpostag": node_expected.udep_xpostag}
link_expected: LinkMC = ed_expected.graph.links[0]
link = {"source": link_expected.source, "target": link_expected.target,