Skip to content
Snippets Groups Projects
tests.py 65.6 KiB
Newer Older
"""Unit tests for testing the application functionality."""
# TODO: TEST BLINKER, GUNICORN, PSYCOPG2-BINARY AND COVERAGE IMPORTS
import copy
import logging
import ntpath
import os
from collections import OrderedDict
from threading import Thread
from unittest.mock import patch, MagicMock, mock_open
from zipfile import ZipFile

import rapidjson as json
import re
import shutil
import string
import sys
import time
import unittest
from multiprocessing import Process
from unittest import TestLoader
from datetime import datetime
from typing import Dict, List, Tuple, Type

from conllu import TokenList
from flask import Flask
from flask.testing import FlaskClient
from gensim.models import Word2Vec
from lxml import etree
from networkx import MultiDiGraph, Graph
from requests import HTTPError
from sqlalchemy.exc import OperationalError
from sqlalchemy.orm import session
from werkzeug.exceptions import NotFound
from werkzeug.wrappers import Response

import csm
import mcserver
from csm.app import create_csm_app
from mcserver.app import create_app, db, start_updater, full_init
from mcserver.app.api.exerciseAPI import map_exercise_data_to_database
from mcserver.app.api.vectorNetworkAPI import add_edges, get_concept_network
from mcserver.app.models import Corpus, UpdateInfo, ResourceType, Exercise, FileType, ExerciseType, ExerciseData, \
    NodeMC, LinkMC, GraphData, Phenomenon, CustomCorpus, AnnisResponse, Solution, DownloadableFile, Language, \
    VocabularyCorpus, SolutionElement, TextComplexityMeasure, FrequencyAnalysis, CitationLevel, FrequencyItem, \
    LearningResult, TextComplexity, Dependency, PartOfSpeech, Category, Choice, Actor, XapiStatement
from mcserver.app.services import AnnotationService, CorpusService, FileService, CustomCorpusService, DatabaseService, \
    XMLservice, TextService
from mcserver.config import TestingConfig, Config
from mocks import Mocks, MockResponse, MockW2V


class McTestCase(unittest.TestCase):
    """The test suite for the main application."""

    def mocked_requests_get(*args, **kwargs):
        if TestingConfig.SIMULATE_CORPUS_NOT_FOUND:
            return MockResponse(json.dumps(AnnisResponse().__dict__))
        elif TestingConfig.SIMULATE_HTTP_ERROR:
            raise HTTPError
        elif TestingConfig.SIMULATE_EMPTY_GRAPH:
            graph_data_raw: dict = dict(Mocks.annis_response_dict["graph_data_raw"])
            graph_data_raw["nodes"] = []
            return MockResponse(json.dumps(graph_data_raw))
        url: str = args[0]
        if url == Config.CTS_API_BASE_URL:
            if kwargs['params']['request'] == 'GetCapabilities':
                return MockResponse(Mocks.cts_capabilities_xml)
            return MockResponse(Mocks.cts_reff_xml)
        elif url.endswith(Config.SERVER_URI_CSM_SUBGRAPH):
            return MockResponse(json.dumps(Mocks.annis_response_dict))
        else:
            gd: GraphData = AnnotationService.map_graph_data(Mocks.annis_response_dict["graph_data_raw"])
            return MockResponse(json.dumps(gd.serialize()))

    def mocked_requests_post(*args, **kwargs):
        url: str = args[0]
        if url.endswith(Config.SERVER_URI_TEXT_COMPLEXITY):
            return MockResponse(Mocks.text_complexity_json_string)
        else:
            ed1: ExerciseData = AnnotationService.map_graph_data_to_exercise(
                Mocks.annis_response_dict["graph_data_raw"],
                "", [Solution(target=SolutionElement(
                    salt_id='salt:/urn:custom:latinLit:proiel.pal-agr.lat:1.1.1/doc1#sent159692tok1'))])
            ed2: ExerciseData = AnnotationService.map_graph_data_to_exercise(
                Mocks.annis_response_dict["graph_data_raw"],
                "", [Solution(target=SolutionElement(
                    salt_id='salt:/urn:custom:latinLit:proiel.pal-agr.lat:1.1.1/doc1#sent159695tok10'))])
            ed2.graph.nodes = ed2.graph.nodes[42:]
            return MockResponse(json.dumps([ed1.serialize(), ed2.serialize()]))

    def setUp(self):
        """Initializes the testing environment."""
        self.start_time = time.time()
        if os.path.exists(Config.GRAPH_DATABASE_DIR):
            shutil.rmtree(Config.GRAPH_DATABASE_DIR)
        patcher = patch.object(TextService, "init_stop_words_latin")
        self.addCleanup(patcher.stop)
        patcher.start()
        self.app: Flask = create_app(TestingConfig)
        self.app.logger.setLevel(logging.CRITICAL)
        self.assertIsInstance(self.app, Flask)
        self.app_context = self.app.app_context()
        self.app_context.push()
        self.app.testing = True
        self.client: FlaskClient = self.app.test_client()
        self.assertIsInstance(self.client, FlaskClient)
        UpdateInfo.query.delete()
        Corpus.query.delete()

    def tearDown(self):
        """Finishes testing by removing the traces."""
        db.session.remove()
        db.drop_all()
        self.app_context.pop()
        print("{0}: {1} seconds".format(self.id(), "%.2f" % (time.time() - self.start_time)))

    @staticmethod
    def add_corpus(corpus: Corpus):
        """ Adds a corpus to the database. """
        db.session.add(corpus)
        UpdateInfo.query.delete()
        ui_cts: UpdateInfo = UpdateInfo(resource_type=ResourceType.cts_data, last_modified_time=datetime.utcnow())
        db.session.add(ui_cts)
        db.session.commit()

    @staticmethod
    def clear_folder(folder_path: str):
        """ Deletes every file in a folder. """
        for f in [x for x in os.listdir(folder_path) if x != ".gitignore"]:
            os.remove(os.path.join(folder_path, f))

    def test_api_corpus_delete(self):
        """ Deletes a single corpus. """
        response: Response = self.client.delete(Mocks.corpora[0].uri)
        self.assertEqual(response.status_code, 404)
        McTestCase.add_corpus(Mocks.corpora[0])
        response = self.client.delete(Mocks.corpora[0].uri)
        data_json: dict = json.loads(response.get_data())
        self.assertEqual(data_json, {"result": True})
        Corpus.query.delete()
        UpdateInfo.query.delete()
        # dirty hack so we can reuse it in other tests
        session.make_transient(Mocks.corpora[0])

    def test_api_corpus_get(self):
        """ Gets information about a single corpus. """
        response: Response = self.client.get(Mocks.corpora[0].uri)
        self.assertEqual(response.status_code, 404)
        McTestCase.add_corpus(Mocks.corpora[0])
        response: Response = self.client.get(Mocks.corpora[0].uri)
        data_json: dict = json.loads(response.get_data())
        actual_corpus: Corpus = Corpus(json_dict=data_json["corpus"])
        self.assertEqual(actual_corpus, Mocks.corpora[0])
        Corpus.query.delete()
        UpdateInfo.query.delete()
        # dirty hack so we can reuse it in other tests
        session.make_transient(Mocks.corpora[0])

    def test_api_corpus_list_get(self):
        """Adds multiple texts to the database and queries them all."""

        class MockFilterBy:
            def __init__(self, do_raise: bool = False, ui: UpdateInfo = None):
                self.do_raise: bool = do_raise
                self.ui: UpdateInfo = ui

            def first(self):
                if self.do_raise:
                    raise OperationalError("error", [], "")
                else:
                    return self.ui

        def expect_result(self: McTestCase, mock: MagicMock, do_raise: bool, lut: str, result: object,
                          lmt: datetime = datetime.utcnow()):
            ui: UpdateInfo = UpdateInfo(resource_type=ResourceType.cts_data, last_modified_time=lmt)
            mfb: MockFilterBy = MockFilterBy(do_raise, ui)
            mock.query.filter_by.return_value = mfb
            response: Response = self.client.get(TestingConfig.SERVER_URI_CORPORA,
                                                 query_string=dict(last_update_time=lut))
            data_json = json.loads(response.get_data())
            if data_json:
                data_json = [Corpus(json_dict=x) for x in data_json["corpora"]]
            self.assertEqual(data_json, result)

        with patch.object(mcserver.app.api.corpusListAPI, "UpdateInfo") as mock_update_info:
            expect_result(self, mock_update_info, True, "0", None)
            expect_result(self, mock_update_info, False, str(int(datetime.utcnow().timestamp() * 1000)), None,
                          datetime.fromtimestamp(0))
            db.session.add_all(Mocks.corpora)
            db.session.commit()
            expect_result(self, mock_update_info, False, "0", Mocks.corpora, datetime.fromtimestamp(time.time()))
        Corpus.query.delete()
        UpdateInfo.query.delete()
        # dirty hack so we can reuse it in other tests
        session.make_transient(Mocks.corpora[0])

    def test_api_corpus_put(self):
        """ Changes information about a single corpus. """
        response: Response = self.client.put(Mocks.corpora[0].uri)
        self.assertEqual(response.status_code, 404)
        McTestCase.add_corpus(Mocks.corpora[0])
        old_title: str = Mocks.corpora[0].title
        new_title: str = "new_title"
        response: Response = self.client.put(Mocks.corpora[0].uri, data=dict(title=new_title))
        data_json: dict = json.loads(response.get_data())
        new_corpus: Corpus = Corpus(json_dict=data_json["corpus"])
        self.assertEqual(new_corpus.title, Mocks.corpora[0].title)
        Mocks.corpora[0].title = old_title
        Corpus.query.delete()
        UpdateInfo.query.delete()
        # dirty hack so we can reuse it in other tests
        session.make_transient(Mocks.corpora[0])

    def test_api_exercise_get(self):
        """ Retrieves an existing exercise by its exercise ID. """
        response: Response = self.client.get(Config.SERVER_URI_EXERCISE, query_string=dict(eid=""))
        self.assertEqual(response.status_code, 404)
        old_urn: str = Mocks.exercise.urn
        Mocks.exercise.urn = ""
        db.session.add(Mocks.exercise)
        db.session.commit()
        with patch.object(CorpusService, "get_corpus", side_effect=[AnnisResponse(), Mocks.annis_response]):
            response = self.client.get(Config.SERVER_URI_EXERCISE, query_string=dict(eid=Mocks.exercise.eid))
            self.assertEqual(response.status_code, 404)
            Mocks.exercise.urn = old_urn
            db.session.commit()
            response = self.client.get(Config.SERVER_URI_EXERCISE, query_string=dict(eid=Mocks.exercise.eid))
            graph_dict: dict = json.loads(response.data.decode("utf-8"))
            ar: AnnisResponse = AnnisResponse(json_dict=graph_dict)
            self.assertEqual(len(ar.nodes), 52)
            Exercise.query.delete()
            session.make_transient(Mocks.exercise)
            self.app_context.push()

    def test_api_exercise_list_get(self):
        """ Retrieves a list of available exercises. """
        ui_exercises: UpdateInfo = UpdateInfo(resource_type=ResourceType.exercise_list,
                                              last_modified_time=datetime.fromtimestamp(1))
        db.session.add(ui_exercises)
        db.session.commit()
        args: dict = dict(lang="fr", last_update_time=int(time.time()))
        response: Response = self.client.get(TestingConfig.SERVER_URI_EXERCISE_LIST, query_string=args)
        self.assertEqual(json.loads(response.get_data()), [])
        args["last_update_time"] = 0
        db.session.add(Mocks.exercise)
        db.session.commit()
        response = self.client.get(TestingConfig.SERVER_URI_EXERCISE_LIST, query_string=args)
        exercises: List[Exercise] = [Exercise(json_dict=x) for x in json.loads(response.get_data())]  # .decode("utf-8")
        self.assertEqual(len(exercises), 1)
        args = dict(lang=Language.English.value, vocabulary=VocabularyCorpus.agldt.name, frequency_upper_bound=500)
        response = self.client.get(TestingConfig.SERVER_URI_EXERCISE_LIST, query_string=args)
        exercises: List[dict] = json.loads(response.get_data())  # .decode("utf-8")
        self.assertTrue(exercises[0]["matching_degree"])
        Exercise.query.delete()
        session.make_transient(Mocks.exercise)

    def test_api_file_get(self):
        """Gets an existing exercise"""
        ui_file: UpdateInfo = UpdateInfo(resource_type=ResourceType.file_api_clean,
                                         last_modified_time=datetime.fromtimestamp(1))
        db.session.add(ui_file)
        db.session.commit()
        # create a fake old file, to be deleted on the next GET request
        FileService.create_tmp_file(FileType.xml, "old")
        args: dict = dict(type=FileType.xml.value, id=Mocks.exercise.eid, solution_indices="[0]")
        response: Response = self.client.get(TestingConfig.SERVER_URI_FILE, query_string=args)
        self.assertEqual(response.status_code, 404)
        file_path: str = os.path.join(Config.TMP_DIRECTORY, Mocks.exercise.eid + "." + FileType.xml.value)
        file_content: str = "<xml></xml>"
        with open(file_path, "w+") as f:
            f.write(file_content)
        ui_file.last_modified_time = datetime.utcnow()
        db.session.commit()
        del ui_file
        response = self.client.get(TestingConfig.SERVER_URI_FILE, query_string=args)
        os.remove(file_path)
        self.assertEqual(response.data.decode("utf-8"), file_content)
        # add the mapped exercise to the database
        db.session.add(Mocks.exercise)
        db.session.commit()
        args["type"] = FileType.pdf.value
        response = self.client.get(TestingConfig.SERVER_URI_FILE, query_string=args)
        # the PDFs are not deterministically reproducible because the creation date etc. is written into them
        self.assertTrue(response.data.startswith(Mocks.exercise_pdf))
        Exercise.query.delete()
        session.make_transient(Mocks.exercise)

    def test_api_file_post(self):
        """ Posts exercise data to be saved temporarily or permanently on the server, e.g. for downloading. """
        learning_result: str = Mocks.xapi_json_string
        data_dict: dict = dict(learning_result=learning_result)
        self.client.post(TestingConfig.SERVER_URI_FILE, headers=Mocks.headers_form_data, data=data_dict)
        lrs: List[LearningResult] = LearningResult.query.all()
        self.assertEqual(len(lrs), 1)
        data_dict: dict = dict(file_type=FileType.xml.name, urn=Mocks.urn_custom, html_content="<html></html>")
        response: Response = self.client.post(TestingConfig.SERVER_URI_FILE, headers=Mocks.headers_form_data,
                                              data=data_dict)
        file_name = json.loads(response.data.decode("utf-8"))
        self.assertTrue(file_name.endswith(".xml"))
        os.remove(os.path.join(Config.TMP_DIRECTORY, file_name))
        LearningResult.query.delete()

    def test_api_frequency_get(self):
        """ Requests a frequency analysis for a given URN. """
        with patch.object(mcserver.app.services.corpusService.requests, "get", return_value=MockResponse(
                json.dumps([FrequencyItem(values=[], phenomena=[], count=[]).serialize()]))):
            response: Response = self.client.get(TestingConfig.SERVER_URI_FREQUENCY,
                                                 query_string=dict(urn=Mocks.urn_custom))
            result_list: List[dict] = json.loads(response.data.decode("utf-8"))
            fa: FrequencyAnalysis = FrequencyAnalysis(json_list=result_list)
            self.assertEqual(len(fa), 1)

    def test_api_h5p_get(self):
        """ Requests a H5P JSON file for a given exercise. """
        args: dict = dict(eid=Mocks.exercise.eid, lang=Language.English.value, solution_indices="[0]")
        response: Response = self.client.get(TestingConfig.SERVER_URI_H5P, query_string=args)
        self.assertEqual(response.status_code, 404)
        db.session.add(Mocks.exercise)
        db.session.commit()
        response = self.client.get(TestingConfig.SERVER_URI_H5P, query_string=args)
        self.assertIn(Mocks.h5p_json_cloze[1:-1], response.data.decode("utf-8"))
        Mocks.exercise.exercise_type = ExerciseType.kwic.value
        db.session.commit()
        response = self.client.get(TestingConfig.SERVER_URI_H5P, query_string=args)
        self.assertEqual(response.status_code, 422)
        Mocks.exercise.exercise_type = ExerciseType.matching.value
        db.session.commit()
        response = self.client.get(TestingConfig.SERVER_URI_H5P, query_string=args)
        self.assertIn(Mocks.h5p_json_matching[1:-1], response.data.decode("utf-8"))
        Mocks.exercise.exercise_type = ExerciseType.cloze.value
        args["lang"] = "fr"
        response = self.client.get(TestingConfig.SERVER_URI_H5P, query_string=args)
        self.assertIn(Mocks.h5p_json_cloze[1:-1], response.data.decode("utf-8"))
        Exercise.query.delete()
        session.make_transient(Mocks.exercise)

    @patch('mcserver.app.api.kwicAPI.requests.post', side_effect=mocked_requests_post)
    def test_api_kwic_post(self, mock_post: MagicMock):
        """ Posts an AQL query to create a KWIC visualization in SVG format. """
        data_dict: dict = dict(search_values=Mocks.exercise.search_values, urn=Mocks.urn_custom)
        response: Response = self.client.post(TestingConfig.SERVER_URI_KWIC, headers=Mocks.headers_form_data,
                                              data=data_dict)
        self.assertTrue(response.data.startswith(Mocks.kwic_svg))

    def test_api_not_found(self):
        """Checks the 404 response in case of an invalid API query URL."""
        with self.assertRaises(NotFound):
            self.client.get("/")

    @patch('mcserver.app.services.textComplexityService.requests.post', side_effect=mocked_requests_post)
    @patch('mcserver.app.services.corpusService.requests.get', side_effect=mocked_requests_get)
    def test_api_raw_text_get(self, mock_post_tcs: MagicMock, mock_get_cs: MagicMock):
        """ Retrieves the raw text for a given URN. """
        TestingConfig.SIMULATE_CORPUS_NOT_FOUND = True
        response: Response = self.client.get(TestingConfig.SERVER_URI_RAW_TEXT, query_string=dict(urn=Mocks.urn_custom))
        self.assertEqual(response.status_code, 404)
        TestingConfig.SIMULATE_CORPUS_NOT_FOUND = False
        response = self.client.get(TestingConfig.SERVER_URI_RAW_TEXT, query_string=dict(urn=Mocks.urn_custom))
        self.assertEqual(len(json.loads(response.data.decode("utf-8"))["nodes"]), 52)
        TestingConfig.SIMULATE_EMPTY_GRAPH = True
        response = self.client.get(TestingConfig.SERVER_URI_RAW_TEXT, query_string=dict(urn=Mocks.urn_custom))
        self.assertEqual(response.status_code, 404)
        TestingConfig.SIMULATE_EMPTY_GRAPH = False

Konstantin Schulz's avatar
Konstantin Schulz committed
    def test_api_static_exercises_get(self):
        """ Retrieves static exercises from the frontend and publishes deep URLs for each one of them. """
        exercises: List[Tuple[str, str, str]] = [
            (Config.H5P_FILL_BLANKS,) + Mocks.h5p_json_fill_blanks_1,
            (Config.H5P_FILL_BLANKS,) + Mocks.h5p_json_fill_blanks_3,
            (Config.H5P_FILL_BLANKS,) + Mocks.h5p_json_fill_blanks_4,
            (Config.H5P_FILL_BLANKS,) + Mocks.h5p_json_fill_blanks_13,
            (Config.H5P_DRAG_TEXT, "1_en", Mocks.h5p_json_cloze),
            (Config.H5P_MULTI_CHOICE, "1_en", Mocks.h5p_json_multi_choice),
            (Config.H5P_MULTI_CHOICE, "2_en", Mocks.h5p_json_multi_choice_2),
            (Config.H5P_MULTI_CHOICE,) + Mocks.h5p_json_multi_choice_9,
            (Config.H5P_VOC_LIST, "1_en", Mocks.h5p_json_voc_list)]
        paths: List[str] = []
        for exercise in exercises:
            file_name: str = exercise[1] + ".json"
            file_path: str = os.path.join(Config.TMP_DIRECTORY, exercise[0], "content", file_name)
            os.makedirs(os.path.split(file_path)[0], exist_ok=True)
            json.dump(json.loads(exercise[2]), open(file_path, "w+"))
            paths.append(file_path)
        with ZipFile(TestingConfig.STATIC_EXERCISES_ZIP_FILE_PATH, "w") as z:
            for path in paths:
                z.write(path)
        for exercise in exercises:
            shutil.rmtree(os.path.join(Config.TMP_DIRECTORY, exercise[0]), ignore_errors=True)
        zip_content: bytes = open(TestingConfig.STATIC_EXERCISES_ZIP_FILE_PATH, "rb").read()
        with patch.object(mcserver.app.api.staticExercisesAPI.requests, "get",
                          side_effect=[MockResponse("{}", ok=False), MockResponse("{}", content=zip_content)]):
            with patch.object(AnnotationService, "get_udpipe", return_value=Mocks.static_exercises_udpipe_string):
                response = self.client.get(TestingConfig.SERVER_URI_STATIC_EXERCISES)
                self.assertEqual(response.status_code, 503)
                response: Response = self.client.get(TestingConfig.SERVER_URI_STATIC_EXERCISES)
                os.remove(TestingConfig.STATIC_EXERCISES_ZIP_FILE_PATH)
                self.assertGreater(len(response.data.decode("utf-8")), 1900)

    @patch('mcserver.app.services.corpusService.requests.get', side_effect=mocked_requests_get)
    def test_api_subgraph_get(self, mock_get: MagicMock):
        """ Retrieves subgraph data for a given URN. """
        ar: AnnisResponse = CorpusService.get_subgraph(Mocks.urn_custom, 'tok="quarum"', 0, 0, False)
        self.assertEqual(len(ar.solutions), 3)

    @patch('mcserver.app.services.corpusService.requests.get', side_effect=mocked_requests_get)
    @patch('mcserver.app.services.textComplexityService.requests.post', side_effect=mocked_requests_post)
    def test_api_text_complexity_get(self, mock_get: MagicMock, mock_post: MagicMock):
        """ Calculates text complexity measures for a given URN. """
        args: dict = dict(urn=Mocks.urn_custom, measure=TextComplexityMeasure.all.name)
        response: Response = self.client.get(TestingConfig.SERVER_URI_TEXT_COMPLEXITY, query_string=args)
        self.assertEqual(response.data.decode("utf-8"), Mocks.text_complexity_json_string)
        args["measure"] = "n_w"
        response = self.client.get(TestingConfig.SERVER_URI_TEXT_COMPLEXITY, query_string=args)
        self.assertEqual(json.loads(response.data.decode("utf-8"))["n_w"], 52)

    @patch('MyCapytain.retrievers.cts5.requests.get', side_effect=mocked_requests_get)
    def test_api_valid_reff_get(self, mock_get: MagicMock):  #
        """ Retrieves possible citations for a given URN. """
        args: dict = dict(urn=Mocks.urn_custom[:-14])
        response: Response = self.client.get(TestingConfig.SERVER_URI_VALID_REFF, query_string=args)
        self.assertEqual(len(json.loads(response.data.decode("utf-8"))), 3)
        McTestCase.clear_folder(Config.REFF_CACHE_DIRECTORY)
        args["urn"] = f"{Mocks.urn_custom[:-13]}4"
        response = self.client.get(TestingConfig.SERVER_URI_VALID_REFF, query_string=args)
        self.assertEqual(response.status_code, 404)
        McTestCase.clear_folder(Config.REFF_CACHE_DIRECTORY)
        args["urn"] = f"{Mocks.urn_custom[:-13]}abc"
        response = self.client.get(TestingConfig.SERVER_URI_VALID_REFF, query_string=args)
        self.assertEqual(response.status_code, 400)
        McTestCase.clear_folder(Config.REFF_CACHE_DIRECTORY)
        TestingConfig.SIMULATE_HTTP_ERROR = True
        self.assertEqual(len(CorpusService.get_standard_corpus_reff(Mocks.urn[:-8])), 0)
        TestingConfig.SIMULATE_HTTP_ERROR = False
        reff: List[str] = CorpusService.get_standard_corpus_reff(Mocks.urn[:-8])
        self.assertEqual(len(reff), 7)

    def test_api_vector_network_get(self):
        """ Builds a network of semantically similar vectors for a given list of words. """
        with patch.object(mcserver.app.api.vectorNetworkAPI, "add_edges", side_effect=Mocks.mock_add_eges):
            with patch.object(mcserver.app.api.vectorNetworkAPI.Word2Vec, "load", return_value=MockW2V()):
                args: dict = dict(search_regex='ueritas', nearest_neighbor_count=150, min_count=6)
                response: Response = self.client.get(TestingConfig.SERVER_URI_VECTOR_NETWORK, query_string=args)
                svg_string: str = json.loads(response.data.decode("utf-8"))
                self.assertGreater(len(svg_string), 6500)

    def test_api_vector_network_post(self):
        """ Returns contexts that are semantically similar to a given query. """
        mock_data: str = "This is a sentence.\nAnd here is yet another one.\n"
        with patch("mcserver.app.api.vectorNetworkAPI.open", mock_open(read_data=mock_data)):
            with patch.object(mcserver.app.api.vectorNetworkAPI.Word2Vec, "load", return_value=MockW2V()):
                data_dict: dict = dict(search_regex='uera', nearest_neighbor_count=10)
                response: Response = self.client.post(TestingConfig.SERVER_URI_VECTOR_NETWORK,
                                                      headers=Mocks.headers_form_data, data=data_dict)
                self.assertEqual(len(json.loads(response.data.decode("utf-8"))), 2)

    @patch('mcserver.app.services.corpusService.requests.get', side_effect=mocked_requests_get)
    @patch('mcserver.app.services.textComplexityService.requests.post', side_effect=mocked_requests_post)
    def test_api_vocabulary_get(self, mock_get: MagicMock, mock_post: MagicMock):
        """ Calculates lexical overlap between a text (specified by URN) and a static vocabulary. """
        args: dict = dict(query_urn=Mocks.urn_custom, show_oov=True, vocabulary=VocabularyCorpus.agldt.name,
                          frequency_upper_bound=500)
        response: Response = self.client.get(TestingConfig.SERVER_URI_VOCABULARY, query_string=args)
        ar: AnnisResponse = AnnisResponse(json_dict=json.loads(response.data.decode("utf-8")))
        self.assertTrue(NodeMC(json_dict=ar.nodes[3]).is_oov)
        args["show_oov"] = False
        args["frequency_upper_bound"] = 6000
        response = self.client.get(TestingConfig.SERVER_URI_VOCABULARY, query_string=args)
        self.assertEqual(json.loads(response.data.decode("utf-8"))[0]["matching_degree"], 90.9090909090909)

    def test_app_init(self):
        """Creates a CSM app in testing mode."""
        CorpusService.init_graphannis_logging()
        log_path: str = os.path.join(os.getcwd(), Config.GRAPHANNIS_LOG_PATH)
        self.assertTrue(os.path.exists(log_path))
        os.remove(log_path)
        with patch.object(sys, 'argv', Mocks.test_args):
            app: Flask = csm.get_app()
            self.assertIsInstance(app, Flask)
            self.assertTrue(app.config["TESTING"])
            UpdateInfo.query.delete()
            app = mcserver.get_app()
            self.assertIsInstance(app, Flask)
            self.assertTrue(app.config["TESTING"])
        self.app_context.push()
        Corpus.query.delete()

    def test_create_app(self):
        """Creates a new Flask application and configures it. Initializes the application and the database."""
        with patch.object(sys, "argv", [None, None, Config.FLASK_MIGRATE]):
            with patch.object(mcserver.app, "init_app_common", return_value=Flask(__name__)):
                cfg: Type[Config] = TestingConfig
                old_uri: str = cfg.SQLALCHEMY_DATABASE_URI
                create_app(cfg)
                self.assertEqual(cfg.SQLALCHEMY_DATABASE_URI, Config.DATABASE_URL_LOCAL)
495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999
                cfg.SQLALCHEMY_DATABASE_URI = old_uri
        self.app_context.push()

    def test_init_corpus_storage_manager(self):
        """ Initializes the corpus storage manager. """
        ui_cts: UpdateInfo = UpdateInfo(resource_type=ResourceType.cts_data, last_modified_time=datetime.utcnow())
        db.session.add(ui_cts)
        db.session.commit()
        csm_process: Process
        with patch.object(sys, 'argv', Mocks.test_args):
            os.environ[Config.COVERAGE_ENVIRONMENT_VARIABLE] = Config.COVERAGE_CONFIGURATION_FILE_NAME
            csm_process = Process(target=csm.run_app)
        csm_process.start()
        self.app_context.push()
        self.assertTrue(csm_process.is_alive())
        csm_process.terminate()
        csm_process.join()
        self.assertFalse(csm_process.is_alive())
        UpdateInfo.query.delete()

    @patch('mcserver.app.services.textComplexityService.requests.post', side_effect=mocked_requests_post)
    def test_map_exercise_data_to_database(self, mock_post: MagicMock):
        """Maps exercise data to the database and saves it for later access."""
        ui_exercises: UpdateInfo = UpdateInfo(resource_type=ResourceType.exercise_list,
                                              last_modified_time=datetime.fromtimestamp(1))
        db.session.add(ui_exercises)
        db.session.commit()
        exercise_expected: Exercise = Mocks.exercise
        exercise: Exercise = map_exercise_data_to_database(
            solutions=[Solution(json_dict=x) for x in json.loads(exercise_expected.solutions)],
            exercise_data=Mocks.exercise_data, instructions=exercise_expected.instructions,
            exercise_type=exercise_expected.exercise_type,
            exercise_type_translation=exercise_expected.exercise_type_translation, xml_guid=exercise_expected.eid,
            conll=exercise_expected.conll, correct_feedback=exercise_expected.correct_feedback,
            partially_correct_feedback=exercise_expected.partially_correct_feedback, urn=Mocks.urn_custom,
            incorrect_feedback=exercise_expected.incorrect_feedback, search_values=exercise_expected.search_values,
            general_feedback=exercise_expected.general_feedback, work_author="", work_title="")
        expected_values: List[str] = [
            exercise_expected.conll, exercise_expected.general_feedback, exercise_expected.incorrect_feedback,
            exercise_expected.search_values, exercise_expected.partially_correct_feedback,
            exercise_expected.correct_feedback, exercise_expected.instructions,
            exercise_expected.exercise_type_translation, exercise_expected.exercise_type, exercise_expected.solutions,
            exercise_expected.uri, exercise_expected.eid]
        actual_values: List[str] = [
            exercise.conll, exercise.general_feedback, exercise.incorrect_feedback, exercise.search_values,
            exercise.partially_correct_feedback, exercise.correct_feedback, exercise.instructions,
            exercise.exercise_type_translation, exercise.exercise_type, exercise.solutions, exercise.uri, exercise.eid]
        self.assertEqual(expected_values, actual_values)
        exercise_from_db: Exercise = Exercise.query.one()
        self.assertEqual(exercise, exercise_from_db)
        Exercise.query.delete()
        session.make_transient(Mocks.exercise)

    @patch('MyCapytain.retrievers.cts5.requests.get', side_effect=mocked_requests_get)
    def test_update_corpora(self, mock_get: MagicMock):
        """Checks the remote repositories for new corpora to be included in our database."""
        CorpusService.update_corpora()
        self.assertEqual(len(CorpusService.existing_corpora), 1)
        ec: Corpus = CorpusService.existing_corpora[0]
        ec.title = ""
        db.session.commit()
        McTestCase.add_corpus(Corpus(source_urn="123"))
        cls: List[CitationLevel] = [ec.citation_level_1, ec.citation_level_2, ec.citation_level_3]
        CorpusService.update_corpus(ec.title, ec.source_urn, ec.author, cls, ec)
        self.assertFalse(ec.title)
        CorpusService.update_corpora()
        self.assertTrue(ec.title)
        Corpus.query.delete()


class CsmTestCase(unittest.TestCase):
    """The test suite for the Corpus Storage Manager application."""

    @staticmethod
    def set_up_mcserver() -> FlaskClient:
        app: Flask = create_app(TestingConfig)
        app.logger.setLevel(logging.CRITICAL)
        app.testing = True
        return app.test_client()

    def mocked_requests_post(*args, **kwargs):
        url: str = args[0]
        if url.endswith(Config.SERVER_URI_TEXT_COMPLEXITY):
            return MockResponse(Mocks.text_complexity_json_string)
        elif url[-1] == '/':
            return MockResponse("}{")
        else:
            return MockResponse(json.dumps(Mocks.annis_response_dict))

    def setUp(self):
        """Initializes the testing environment."""
        self.start_time = time.time()
        if os.path.exists(Config.GRAPH_DATABASE_DIR):
            shutil.rmtree(Config.GRAPH_DATABASE_DIR)
        patcher = patch.object(TextService, "init_stop_words_latin")
        self.addCleanup(patcher.stop)
        patcher.start()
        self.app: Flask = create_csm_app(TestingConfig)
        self.app.logger.setLevel(logging.CRITICAL)
        self.app_context = self.app.app_context()
        self.app_context.push()
        self.app.testing = True
        self.client: FlaskClient = self.app.test_client()
        UpdateInfo.query.delete()
        Corpus.query.delete()

    def tearDown(self):
        """Finishes testing by removing the traces."""
        Config.CORPUS_STORAGE_MANAGER.__exit__(None, None, None)
        db.session.remove()
        db.drop_all()
        self.app_context.pop()
        print("{0}: {1} seconds".format(self.id(), "%.2f" % (time.time() - self.start_time)))

    def test_api_annis_find(self):
        """Retrieves search results from ANNIS for a given corpus and AQL query."""
        disk_urn: str = AnnotationService.get_disk_urn(Mocks.urn_custom)
        AnnotationService.map_conll_to_graph(corpus_name=Mocks.urn_custom, conll=Mocks.annotations,
                                             cs=Config.CORPUS_STORAGE_MANAGER, file_name=disk_urn)
        response: Response = self.client.get(Config.SERVER_URI_ANNIS_FIND,
                                             query_string=dict(urn=Mocks.urn_custom, aql="tok"))
        matches: List[str] = json.loads(response.get_data())
        self.assertEqual(len(matches), 6)
        solutions: List[Solution] = CorpusService.get_matches(Mocks.urn_custom, ['tok ->dep tok'],
                                                              [Phenomenon.dependency])
        self.assertEqual(len(solutions), 5)
        solutions = CorpusService.get_matches(Mocks.urn_custom, ['upostag="VERB" ->dep tok'],
                                              [Phenomenon.partOfSpeech, Phenomenon.dependency])
        self.assertEqual(len(solutions), 5)
        solutions = CorpusService.get_matches(Mocks.urn_custom, ['tok ->dep tok ->dep tok'],
                                              [Phenomenon.dependency, Phenomenon.partOfSpeech])
        self.assertEqual(len(solutions), 3)

    def test_api_csm_get(self):
        """Gets the raw text for a specific URN."""
        ret_vals: List[AnnisResponse] = [AnnisResponse(), Mocks.annis_response]
        with patch.object(CorpusService, "get_corpus", side_effect=ret_vals):
            response: Response = self.client.get(TestingConfig.SERVER_URI_CSM, query_string=dict(urn=Mocks.urn[:5]))
            self.assertEqual(response.status_code, 404)
            response: Response = self.client.get(TestingConfig.SERVER_URI_CSM, query_string=dict(urn=Mocks.urn_custom))
            graph_data_raw: dict = json.loads(response.get_data().decode("utf-8"))
            graph_data: GraphData = GraphData(json_dict=graph_data_raw)
            text_raw = " ".join(x.annis_tok for x in graph_data.nodes)
            # remove the spaces before punctuation because, otherwise, the parser won't work correctly
            received_text: str = re.sub('[ ]([{0}])'.format(string.punctuation), r'\1', text_raw)
            expected_text: str = "Pars est prima prudentiae ipsam cui praecepturus es aestimare personam."
            self.assertIn(expected_text, received_text)

    @patch('mcserver.app.services.corpusService.requests.post', side_effect=mocked_requests_post)
    @patch('mcserver.app.services.textComplexityService.requests.post', side_effect=mocked_requests_post)
    def test_api_exercise_post(self, mock_post_cs: MagicMock, mock_post_tcs: MagicMock):
        """ Creates a new exercise from scratch. """
        ui_exercises: UpdateInfo = UpdateInfo(resource_type=ResourceType.exercise_list,
                                              last_modified_time=datetime.fromtimestamp(1))
        db.session.add(ui_exercises)
        db.session.commit()
        client: FlaskClient = CsmTestCase.set_up_mcserver()
        data_dict: dict = dict(urn=Mocks.exercise.urn, type=ExerciseType.matching.value,
                               search_values=Mocks.exercise.search_values)
        response: Response = client.post(Config.SERVER_URI_EXERCISE, headers=Mocks.headers_form_data, data=data_dict)
        ar: AnnisResponse = AnnisResponse(json_dict=json.loads(response.data.decode("utf-8")))
        self.assertEqual(len(ar.solutions), 3)
        Config.CORPUS_STORAGE_MANAGER_PORT = f"{Config.CORPUS_STORAGE_MANAGER_PORT}/"
        response: Response = client.post(Config.SERVER_URI_EXERCISE, headers=Mocks.headers_form_data, data=data_dict)
        self.assertEqual(response.status_code, 500)
        Config.CORPUS_STORAGE_MANAGER_PORT = int(Config.CORPUS_STORAGE_MANAGER_PORT[:-1])
        self.app_context.push()

    def test_api_frequency_get(self):
        """ Requests a frequency analysis for a given URN. """
        expected_fa: FrequencyAnalysis = FrequencyAnalysis()
        expected_fa.append(
            FrequencyItem(values=[Dependency.object.name], phenomena=[Phenomenon.dependency], count=1))
        expected_fa.append(
            FrequencyItem(values=[PartOfSpeech.adjective.name], phenomena=[Phenomenon.partOfSpeech], count=1))
        with patch.object(CorpusService, "get_frequency_analysis", return_value=expected_fa):
            response: Response = self.client.get(TestingConfig.SERVER_URI_FREQUENCY,
                                                 query_string=dict(urn=Mocks.urn_custom))
            result_list: List[dict] = json.loads(response.data.decode("utf-8"))
            fa: FrequencyAnalysis = FrequencyAnalysis(json_list=result_list)
            self.assertEqual(fa[0].values, expected_fa[0].values)
            self.assertEqual(fa[1].values[0], None)

    def test_api_subgraph_get(self):
        """ Retrieves subgraph data for a given URN. """
        args: dict = dict(urn=Mocks.urn_custom, aqls=['tok="quarum"'], ctx_left="0", ctx_right="0")
        response: Response = self.client.get(TestingConfig.SERVER_URI_CSM_SUBGRAPH, query_string=args)
        self.assertEqual(response.data.decode("utf-8"), Mocks.subgraph_json)

    def test_api_subgraph_post(self):
        """ Retrieves KWIC-style subgraph data for a given URN. """
        args: dict = dict(urn=Mocks.urn_custom, aqls=['tok="quarum"'], ctx_left="5", ctx_right="5")
        response: Response = self.client.post(TestingConfig.SERVER_URI_CSM_SUBGRAPH, data=json.dumps(args))
        results_list: list = json.loads(response.data.decode("utf-8"))
        exercise_data_list: List[ExerciseData] = [ExerciseData(json_dict=x) for x in results_list]
        self.assertEqual(len(exercise_data_list[0].graph.nodes), 10)
        with self.assertRaises(NotImplementedError):
            AnnotationService.get_single_subgraph("", [])

    def test_api_text_complexity_get(self):
        """ Calculates text complexity measures for a given URN. """
        args: dict = dict(urn=Mocks.urn_custom, measure=TextComplexityMeasure.all.name)
        response: Response = self.client.post(TestingConfig.SERVER_URI_TEXT_COMPLEXITY, data=json.dumps(args))
        tc: TextComplexity = TextComplexity(json_dict=json.loads(response.data.decode("utf-8")))
        self.assertEqual(tc.pos, 8)
        args["measure"] = "n_w"
        response = self.client.post(TestingConfig.SERVER_URI_TEXT_COMPLEXITY, data=json.dumps(args))
        tc = TextComplexity(json_dict=json.loads(response.data.decode("utf-8")))
        self.assertEqual(tc.n_w, 14)

    @patch('mcserver.app.services.corpusService.CorpusService.update_corpora')
    def test_check_corpus_list_age(self, mock_update: MagicMock):
        """Checks whether the list of available corpora needs to be updated."""
        ui_cts: UpdateInfo = UpdateInfo(resource_type=ResourceType.cts_data,
                                        last_modified_time=datetime.fromtimestamp(1))
        db.session.add(ui_cts)
        db.session.commit()
        utc_now: datetime = datetime.utcnow()
        DatabaseService.check_corpus_list_age(self.app)
        ui_cts: UpdateInfo = UpdateInfo.query.filter_by(resource_type=ResourceType.cts_data.name).first()
        self.assertGreater(ui_cts.last_modified_time, utc_now)
        UpdateInfo.query.delete()

    def test_corpus_storage_manager(self):
        """Performs an end-to-end test for the Corpus Store Manager."""
        self.client.get(TestingConfig.SERVER_URI_CSM, query_string=dict(urn=Mocks.urn_custom))
        data_dict: dict = dict(title=Mocks.exercise.urn, annotations=Mocks.exercise.conll, aqls=Mocks.aqls,
                               exercise_type=ExerciseType.cloze.name, search_phenomena=[Phenomenon.partOfSpeech.name])
        first_response: Response = self.client.post(TestingConfig.SERVER_URI_CSM, data=json.dumps(data_dict))
        # ANNIS does not create deterministically reproducible results, so we only test for a substring
        self.assertIn(Mocks.graph_data_raw_part, first_response.data.decode("utf-8"))
        third_response: Response = self.client.post(TestingConfig.SERVER_URI_CSM, data=data_dict)
        # Response: Bad Request
        self.assertEqual(third_response.status_code, 400)

    def test_full_init(self):
        """ Fully initializes the application, including logging."""
        self.app.config["TESTING"] = False
        with patch.object(CorpusService, "init_graphannis_logging"):
            with patch.object(mcserver.app, "start_updater") as updater_mock:
                full_init(self.app, True)
                self.assertEqual(updater_mock.call_count, 1)
        self.app.config["TESTING"] = True

    def test_get_annotations_from_string(self):
        """ Gets annotation data from a given string, be it a CoNLL string or a corpus URN. """
        conll: List[TokenList]
        with patch.object(AnnotationService, "get_udpipe", return_value=Mocks.udpipe_string):
            with patch.object(CorpusService, "load_text_list", return_value=Mocks.text_list):
                with patch.object(CorpusService, "get_raw_text", return_value=Mocks.raw_text):
                    conll = CorpusService.get_annotations_from_string(Mocks.urn)
                    self.assertEqual(len(conll[0]), 4)
                mdg: MultiDiGraph = CorpusService.get_graph(Mocks.urn)
                self.assertEqual(len(mdg.nodes), 7)
                mdg = CorpusService.get_graph(f"{Mocks.urn}@1-1")
                self.assertEqual(len(mdg.nodes), 7)
        with patch.object(CustomCorpusService, "get_treebank_annotations", return_value=Mocks.annotations):
            conll = CorpusService.get_annotations_from_string(Mocks.urn_custom)
            self.assertEqual(len(conll[0]), 6)
        with patch.object(CustomCorpusService, "get_custom_corpus_annotations", return_value=Mocks.annotations * 2):
            urn: str = f"{Config.CUSTOM_CORPUS_VIVA_URN}:1.1-1.1"
            conll = CorpusService.get_annotations_from_string(urn)
            self.assertEqual(len(conll), 2)

    def test_get_frequency_analysis(self):
        """ Gets a frequency analysis by calling the CSM. """
        with patch.object(mcserver.app.services.corpusService.requests, "get", return_value=MockResponse(
                json.dumps([FrequencyItem(values=[], phenomena=[], count=[]).serialize()]))):
            fa: FrequencyAnalysis = CorpusService.get_frequency_analysis(urn=Mocks.urn_custom, is_csm=False)
            self.assertEqual(len(fa), 1)
        CorpusService.get_corpus(Mocks.urn_custom, True)
        with patch.object(CorpusService, "get_corpus", return_value=Mocks.annis_response):
            fa = CorpusService.get_frequency_analysis(Mocks.urn_custom, True)
            self.assertEqual(len(fa), 242)

    def test_get_graph(self):
        """ Retrieves a graph from the cache or, if not there, builds it from scratch. """
        expected_mdg: MultiDiGraph = MultiDiGraph([(1, 2), (2, 3), (3, 4)])
        with patch.object(Config.CORPUS_STORAGE_MANAGER, "subcorpus_graph", return_value=expected_mdg):
            mdg: MultiDiGraph = CorpusService.get_graph(Mocks.urn)
            self.assertEqual(mdg, expected_mdg)

    def test_init_updater(self):
        """Initializes the corpus list updater."""
        with patch.object(DatabaseService, 'check_corpus_list_age', side_effect=OperationalError("", [], "")):
            ui_cts: UpdateInfo = UpdateInfo(resource_type=ResourceType.cts_data,
                                            last_modified_time=datetime.fromtimestamp(1))
            db.session.add(ui_cts)
            db.session.commit()
            with patch.object(CorpusService, 'update_corpora') as update_mock:
                t: Thread = start_updater(self.app)
                self.assertIsInstance(t, Thread)
                self.assertTrue(t.is_alive())
                time.sleep(0.1)
                UpdateInfo.query.delete()
                assert not update_mock.called

    def test_process_corpus_data(self):
        """Builds a graph from annotated text data."""
        disk_urn: str = AnnotationService.get_disk_urn(Mocks.urn_custom)
        AnnotationService.map_conll_to_graph(corpus_name=Mocks.urn_custom, conll=Mocks.annotations,
                                             cs=Config.CORPUS_STORAGE_MANAGER, file_name=disk_urn)
        result: dict = CorpusService.process_corpus_data(urn=Mocks.urn_custom, annotations=Mocks.annotations,
                                                         aqls=["upostag"], exercise_type=ExerciseType.cloze,
                                                         search_phenomena=[Phenomenon.partOfSpeech])
        gd: GraphData = AnnotationService.map_graph_data(result["graph_data_raw"])
        self.assertEqual(len(gd.nodes), len(Mocks.nodes))
        text_parts_list: List[Tuple[str, str]] = CorpusService.load_text_list(Mocks.urn_custom)
        self.assertEqual(len(text_parts_list), 1)

    def test_run_app(self):
        """ Creates a new app and runs it. """
        with patch.object(csm, "get_app") as mock_get_app:
            csm.run_app()
            self.assertEqual(mock_get_app.call_count, 1)


class CommonTestCase(unittest.TestCase):
    def setUp(self):
        """Initializes the testing environment."""
        self.start_time = time.time()
        if os.path.exists(Config.GRAPH_DATABASE_DIR):
            shutil.rmtree(Config.GRAPH_DATABASE_DIR)
        with patch.object(TextService, "init_stop_words_latin"):
            self.app: Flask = create_app(TestingConfig)
        self.app_context = self.app.app_context()
        self.app_context.push()

    def tearDown(self):
        """Finishes testing by removing the traces."""
        db.session.remove()
        db.drop_all()
        self.app_context.pop()
        print("{0}: {1} seconds".format(self.id(), "%.2f" % (time.time() - self.start_time)))

    def test_add_edges(self):
        """Adds edges to an existing graph based on a list of keys and constraints to their similarity and frequency."""
        w2v: Word2Vec = Word2Vec([x.split() for x in Mocks.raw_text.split(". ")], min_count=1, sample=0)
        graph: Graph = Graph()
        add_edges(["fortis"], w2v, 2, 1, graph)
        self.assertGreater(len(graph.edges), 1)

    def test_add_urn_to_sentences(self):
        """ Adds the relevant URN for every annotated sentence. """
        conll: List[TokenList] = copy.deepcopy(Mocks.annotations)
        text_list: List[Tuple[str, str]] = [(Mocks.urn, conll[0].tokens[0]["form"]), (Mocks.urn_custom, "")]
        conll[0].tokens[0]["form"] += "."
        conll.append(TokenList(tokens=[
            {"id": 1, "form": "Caesar.", "lemma": "Caeso", "upostag": "VERB", "xpostag": "L3|modJ|tem3|gen4|stAV",
             "feats": {"Mood": "Ind", "Number": "Sing", "Person": "1", "Tense": "Fut", "VerbForm": "Fin",
                       "Voice": "Pass"}, "head": 0, "deprel": "root", "deps": None, "misc": {"ref": "1.1"}}],
            metadata=OrderedDict([("sent_id", "2"), ("urn", "")])))
        AnnotationService.add_urn_to_sentences(text_list, conll)
        self.assertEqual(conll[0].metadata["urn"], Mocks.urn)
        self.assertEqual(conll[1].metadata["urn"], "")

    def test_create_xml_string(self):
        """Exports the exercise data to the Moodle XML format. See https://docs.moodle.org/35/en/Moodle_XML_format ."""
        xml_string: str = XMLservice.create_xml_string(Exercise(exercise_type=ExerciseType.matching.value), [],
                                                       FileType.pdf, [])
        self.assertEqual(xml_string, Mocks.exercise_xml)

    def test_extract_custom_corpus_text(self):
        """ Extracts text from the relevant parts of a (custom) corpus. """
        new_text_parts: List[Tuple[str, str]] = CustomCorpusService.extract_custom_corpus_text(
            Mocks.text_parts, ["", ""], ["", "0"], "", 1, [False, True])
        self.assertEqual(len(new_text_parts), 0)
        new_text_parts = CustomCorpusService.extract_custom_corpus_text(Mocks.text_parts, ["", ""], ["", "0"], "", 1)
        self.assertEqual(new_text_parts[0][1], Mocks.text_parts[0].text_value)
        new_text_parts = CustomCorpusService.extract_custom_corpus_text(Mocks.text_parts, ["1"], ["3"], "")
        self.assertEqual(new_text_parts[0][1], Mocks.text_parts[0].text_value)

    def test_find_matches(self):
        """ Finds matches for a given URN and AQL and returns the corresponding node IDs. """
        expected_matches: List[str] = ["a", "b"]
        with patch.object(mcserver.app.services.corpusService.requests, "get",
                          return_value=MockResponse(json.dumps(expected_matches))):
            matches: List[str] = CorpusService.find_matches(Mocks.urn, "")
            self.assertEqual(matches, expected_matches)

    def test_get_concept_network(self):
        """Extracts a network of words from vector data in an AI model."""
        with patch.object(mcserver.app.api.vectorNetworkAPI, "add_edges", side_effect=Mocks.mock_add_eges):
            with patch.object(mcserver.app.api.vectorNetworkAPI.Word2Vec, "load", return_value=MockW2V()):
                svg_string: str = get_concept_network("ueritas", highlight_regex_string="uera")
                self.assertGreater(len(svg_string), 6500)

    def test_get_corpus(self):
        """ Loads the text for a standard corpus from the CTS API or cache. """
        ar: AnnisResponse = CorpusService.get_corpus("", True)
        self.assertEqual(len(ar.nodes), 0)

    def test_get_custom_corpus_annotations(self):
        """ Retrieves the annotated text for a custom non-PROIEL corpus, e.g. a textbook. """
        mock_conll: List[TokenList] = Mocks.annotations + [TokenList([], metadata=OrderedDict([("sent_id", "3")]))]
        with patch.object(CustomCorpusService, "get_custom_corpus_text", return_value=Mocks.text_list):
            with patch.object(AnnotationService, "get_udpipe", return_value=Mocks.udpipe_string):
                with patch.object(AnnotationService, "parse_conll_string", return_value=mock_conll):
                    conll: List[TokenList] = CustomCorpusService.get_custom_corpus_annotations(Mocks.urn + "@1-2")
                    self.assertEqual(len(conll), 1)

    def test_get_custom_corpus_reff(self):
        """ Retrieves possible citations for given URN. """
        CustomCorpusService.custom_corpora[4].text_parts = Mocks.text_parts
        reff: List[str] = CustomCorpusService.get_custom_corpus_reff(Mocks.urn_custom[:-15])
        self.assertEqual(len(reff), 0)
        McTestCase.clear_folder(Config.REFF_CACHE_DIRECTORY)
        reff = CustomCorpusService.get_custom_corpus_reff(Mocks.urn_custom[:-14])
        self.assertEqual(len(reff), 1)
        McTestCase.clear_folder(Config.REFF_CACHE_DIRECTORY)
        reff = CustomCorpusService.get_custom_corpus_reff(Mocks.urn_custom[:-9])
        self.assertEqual(len(reff), 2)
        reff = CustomCorpusService.get_custom_corpus_reff(Mocks.urn_custom[:-9])
        self.assertEqual(len(reff), 2)
        McTestCase.clear_folder(Config.REFF_CACHE_DIRECTORY)

    def test_get_custom_corpus_text(self):
        """ Retrieves the text for a custom corpus, e.g. a textbook. """
        text_list: List[Tuple[str, str]] = CustomCorpusService.get_custom_corpus_text(Mocks.urn)
        self.assertEqual(len(text_list), 0)

    def test_get_pdf_html_string(self):
        """ Builds an HTML string from an exercise, e.g. to construct a PDF from it. """
        Mocks.exercise.exercise_type = ExerciseType.matching.value
        solutions: List[Solution] = [Solution(json_dict=x) for x in json.loads(Mocks.exercise.solutions)]
        result: str = FileService.get_pdf_html_string(Mocks.exercise, Mocks.annotations, FileType.pdf, solutions)
        self.assertEqual(result, '<br><p>: </p><p><table><tr><td>praecepturus</td><td>Caesar</td></tr></table></p>')
        Mocks.exercise.exercise_type = ExerciseType.markWords.value
        result = FileService.get_pdf_html_string(Mocks.exercise, Mocks.annotations, FileType.pdf, solutions)
        self.assertEqual(result, '<p>: </p><p>Caesar et Galli fortes sunt.</p><br><br>')
        Mocks.exercise.exercise_type = ExerciseType.cloze.value

    def test_get_raw_text(self):
        """ Retrieves the raw text for a corpus. """
        with patch.object(CorpusService, "get_corpus", return_value=Mocks.annis_response):
            text: str = CorpusService.get_raw_text(Mocks.urn, True)
            self.assertEqual(len(text), 349)

    def test_get_solutions_by_index(self):
        """ If available, makes use of the solution indices to return only the wanted solutions. """
        solutions: List[Solution] = TextService.get_solutions_by_index(Mocks.exercise)
        self.assertEqual(len(solutions), 1)

    def test_get_treebank_annotations(self):
        """ Retrieves annotations from a treebank. """
        cache_path: str = os.path.join(Config.TREEBANKS_CACHE_DIRECTORY,
                                       ntpath.basename(CustomCorpusService.custom_corpora[4].file_path) + ".json")
        if os.path.exists(cache_path):
            os.remove(cache_path)
        with patch.object(mcserver.app.services.customCorpusService.conllu, "parse",
                          return_value=Mocks.annotations) as parse_mock:
            with patch.object(CustomCorpusService, "get_treebank_sub_annotations", return_value=Mocks.annotations):
                conll: List[TokenList] = CustomCorpusService.get_treebank_annotations(Mocks.urn_custom)
                self.assertIs(conll, Mocks.annotations)
                with patch.object(mcserver.app.services.customCorpusService.json, "loads", side_effect=ValueError):
                    conll = CustomCorpusService.get_treebank_annotations(Mocks.urn_custom)
                    os.remove(cache_path)
                    self.assertEqual(parse_mock.call_count, 2)

    def test_get_treebank_sub_annotations(self):
        """ Retrieves annotations for nested parts of a treebank. """
        annotations: List[TokenList] = Mocks.annotations + [TokenList([], metadata=OrderedDict([("sent_id", "2")])),
                                                            TokenList([], metadata=OrderedDict([("sent_id", "3")]))]
        conll: List[TokenList] = CustomCorpusService.get_treebank_sub_annotations(
            Mocks.urn + "@1-3", annotations, CustomCorpusService.custom_corpora[4])
        self.assertEqual(len(conll), 3)
        cc: CustomCorpus = CustomCorpusService.custom_corpora[-1]
        urn: str = cc.corpus.source_urn + ":1.1-1.2"
        conll = CustomCorpusService.get_treebank_sub_annotations(urn, [], cc)
        self.assertEqual(len(cc.text_parts), 2)

    def test_get_udpipe(self):
        """Annotates a single text with UdPipe. The beginning of the CONLL has to be left out because it contains the
        randomly generated temp file path and thus cannot be predicted exactly."""
        text = "Caesar fortis est. Galli moriuntur."
        conll = AnnotationService.get_udpipe(text)
        self.assertIn(Mocks.udpipe_string, conll)

    def test_init_custom_corpus(self):
        """Adds custom corpora to the corpus list, e.g. the PROIEL corpora."""
        with patch.object(CustomCorpusService, "get_treebank_annotations", return_value=Mocks.annotations):
            cc: CustomCorpus = CustomCorpusService.init_custom_corpus(CustomCorpusService.custom_corpora[0])
            self.assertEqual(len(cc.text_parts), 1)

    def test_init_db_alembic(self):
        """ In Docker, the alembic version is not initially written to the database, so we need to set it manually. """
        db.engine.execute(f"DROP TABLE {Config.DATABASE_TABLE_ALEMBIC}")
        self.assertEqual(db.engine.dialect.has_table(db.engine, Config.DATABASE_TABLE_ALEMBIC), False)
        DatabaseService.init_db_alembic()
        self.assertEqual(db.engine.dialect.has_table(db.engine, Config.DATABASE_TABLE_ALEMBIC), True)

    def test_init_db_corpus(self):
        """Initializes the corpus table."""
        cc: CustomCorpus = CustomCorpusService.custom_corpora[0]
        old_corpus: Corpus = Mocks.corpora[0]
        old_corpus.source_urn = cc.corpus.source_urn
        McTestCase.add_corpus(old_corpus)
        del old_corpus
        DatabaseService.init_db_corpus()
        corpus: Corpus = Corpus.query.filter_by(source_urn=cc.corpus.source_urn).first()
        self.assertEqual(corpus.title, cc.corpus.title)
        Corpus.query.delete()

    def test_init_stop_words_latin(self):
        """Initializes the stop words list for Latin texts and caches it if necessary."""