Newer
Older
"annis::component_name": link_expected.annis_component_name,
"annis::component_type": link_expected.annis_component_type, "udep::deprel": link_expected.udep_deprel}
graph_data_raw: dict = dict(directed=ed_expected.graph.directed, graph=ed_expected.graph.graph,
multigraph=ed_expected.graph.multigraph, links=[link], nodes=[node])
gd: GraphData = AnnotationService.map_graph_data(graph=Graph(), graph_data_raw=graph_data_raw)
self.assertEqual(gd.graph, ed_expected.graph.graph)
self.assertEqual(gd.multigraph, ed_expected.graph.multigraph)
self.assertEqual(gd.directed, ed_expected.graph.directed)
self.assertEqual(gd.nodes[0], ed_expected.graph.nodes[0])
self.assertEqual(gd.links[0], ed_expected.graph.links[0])
graph_data_raw["nodes"] = []
gd = AnnotationService.map_graph_data(graph=Graph(), graph_data_raw=graph_data_raw)
self.assertEqual(len(gd.links), 0)
def test_models(self):
""" Tests various models and their specific methods. """
self.assertFalse(Mocks.corpora[0] == Mocks.corpora[1])
self.assertFalse(Mocks.corpora[0] == "")
self.assertTrue(Mocks.exercise.__repr__().startswith("<Exercise"))
ui: UpdateInfo = UpdateInfo.from_dict(resource_type=ResourceType.cts_data.name, created_time=1.0,
last_modified_time=1.0)
self.assertTrue(ui.__repr__().startswith("<UpdateInfo"))
del ui
self.assertFalse(Mocks.graph_data.links[0] == Mocks.graph_data.links[1])
self.assertTrue(Mocks.graph_data.links[0] == Mocks.graph_data.links[0])
self.assertFalse(Mocks.graph_data.nodes[0] == Mocks.graph_data.nodes[1])
self.assertTrue(Mocks.graph_data.nodes[0] == Mocks.graph_data.nodes[0])
choice_dict: dict = dict(id="", description={"en-US": "desc"})
self.assertEqual(Choice(choice_dict).serialize(), choice_dict)
xapi: XapiStatement = XapiStatement(json.loads(Mocks.xapi_json_string)["0"])
self.assertEqual(len(xapi.serialize().keys()), 5)

Konstantin Schulz
committed
ed: ExerciseData = ExerciseData(json_dict=Mocks.exercise_data.serialize())
self.assertEqual(len(ed.graph.nodes), len(Mocks.exercise_data.graph.nodes))
db.session.query(UpdateInfo).delete()
session.make_transient(Mocks.corpora[0])
session.make_transient(Mocks.exercise)

Konstantin Schulz
committed
def test_query(self) -> None:
"""Executes a query on the database and rolls back the session if errors occur."""
def raise_error(table: Any):
raise InvalidRequestError()
with patch.object(mcserver.app.services.databaseService, "db") as db_mock:
db_mock.session.query.side_effect = raise_error
self.assertEqual(DatabaseService.query(Corpus), None)
def test_remove_older_versions(self):
""" Removes older versions of a record from the list, if there are multiple versions of the same record. """
duplicate_title: str = "title"
records: List[ZenodoRecord] = [ZenodoRecord(title=[duplicate_title], version=["1"]),
ZenodoRecord(title=[duplicate_title], version=["2"])]
remove_older_versions(records)
self.assertEqual(len(records), 1)
self.assertEqual(records[0].version[0], "2")
def test_sort_nodes(self):
"""Sorts the nodes according to the ordering links, i.e., by their tokens' occurrence in the text."""
old_graph_data: GraphData = GraphData(nodes=[], links=[])
new_graph_data: GraphData = AnnotationService.sort_nodes(old_graph_data)
self.assertIs(old_graph_data, new_graph_data)
def test_start_updater(self):
"""Initializes the corpus list updater."""
with patch.object(CorpusService, 'check_corpus_list_age', side_effect=OperationalError("", [], "")):
ui_cts: UpdateInfo = UpdateInfo.from_dict(resource_type=ResourceType.cts_data.name,
last_modified_time=1.0, created_time=1.0)
db.session.add(ui_cts)
DatabaseService.commit()
with patch.object(CorpusService, 'update_corpora') as update_mock:
t: Thread = start_updater(Mocks.app_dict[self.class_name].app)
self.assertIsInstance(t, Thread)
self.assertTrue(t.is_alive())
time.sleep(0.1)
db.session.query(UpdateInfo).delete()
assert not update_mock.called
Mocks.app_dict[self.class_name].app_context.push()
def test_strip_name_spaces(self):
"""Removes all namespaces from an XML document for easier parsing, e.g. with XPath."""
xml: etree._Element = etree.Element("{namespace}root")
child: etree._Element = etree.Element("{namespace}child")
xml.append(child)
with patch("mcserver.app.services.xmlService.hasattr", return_value=False) as has_attr_mock:
XMLservice.strip_name_spaces(xml)
self.assertEqual(len(child.tag), 16)
has_attr_mock.return_value = True
XMLservice.strip_name_spaces(xml)
self.assertEqual(len(child.tag), 5)
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
class CorpusTestCase(unittest.TestCase):
""" Test suite for corpus-related functions. """
def setUp(self):
"""Initializes the testing environment."""
self.start_time = time.time()
self.class_name: str = str(self.__class__)
TestHelper.update_flask_app(self.class_name, create_app)
def tearDown(self):
"""Finishes testing by removing the traces."""
print("{0}: {1} seconds".format(self.id(), "%.2f" % (time.time() - self.start_time)))
@patch('mcserver.app.services.corpusService.CorpusService.update_corpora')
def test_check_corpus_list_age(self, mock_update: MagicMock):
"""Checks whether the list of available corpora needs to be updated."""
db.session.query(UpdateInfo).delete()
CorpusService.check_corpus_list_age(Mocks.app_dict[self.class_name].app)
ui_cts: UpdateInfo = DatabaseService.query(
UpdateInfo, filter_by=dict(resource_type=ResourceType.cts_data.name), first=True)
self.assertEqual(ui_cts, None)
ui_cts = UpdateInfo.from_dict(resource_type=ResourceType.cts_data.name, last_modified_time=1.0,
created_time=1.0)
db.session.add(ui_cts)
DatabaseService.commit()
utc_now: datetime = datetime.utcnow()
CorpusService.check_corpus_list_age(Mocks.app_dict[self.class_name].app)
ui_cts = DatabaseService.query(
UpdateInfo, filter_by=dict(resource_type=ResourceType.cts_data.name), first=True)
self.assertGreater(ui_cts.last_modified_time, utc_now.timestamp())
db.session.query(UpdateInfo).delete()
def test_extract_custom_corpus_text(self):
""" Extracts text from the relevant parts of a (custom) corpus. """
new_text_parts: List[ReferenceableText] = CustomCorpusService.extract_custom_corpus_text(
Mocks.text_parts, ["", ""], ["", "0"], "", 1, [False, True])
self.assertEqual(len(new_text_parts), 0)
new_text_parts = CustomCorpusService.extract_custom_corpus_text(Mocks.text_parts, ["", ""], ["", "0"], "", 1)
self.assertEqual(new_text_parts[0].text, Mocks.text_parts[0].text_value)
new_text_parts = CustomCorpusService.extract_custom_corpus_text(Mocks.text_parts, ["1"], ["3"], "")
self.assertEqual(new_text_parts[0].text, Mocks.text_parts[0].text_value)
def test_get_corpus(self):
""" Loads the text for a standard corpus from the CTS API or cache. """
ar: AnnisResponse = CorpusService.get_corpus("")
self.assertEqual(len(ar.graph_data.nodes), 0)
def test_get_custom_corpus_annotations(self):
""" Retrieves the annotated text for a custom non-PROIEL corpus, e.g. a textbook. """
mock_conll: List[TokenList] = Mocks.annotations + [TokenList([], metadata=OrderedDict([("sent_id", "3")]))]
with patch.object(CustomCorpusService, "get_custom_corpus_text", return_value=Mocks.text_list):
with patch.object(AnnotationService, "get_udpipe", return_value=Mocks.udpipe_string):
with patch.object(AnnotationService, "parse_conll_string", return_value=mock_conll):
conll: List[TokenList] = CustomCorpusService.get_custom_corpus_annotations(Mocks.urn + "@1-2")
self.assertEqual(len(conll), 1)
def test_get_custom_corpus_reff(self):
""" Retrieves possible citations for given URN. """
CustomCorpusService.custom_corpora[4].text_parts = Mocks.text_parts
reff: List[str] = CustomCorpusService.get_custom_corpus_reff(Mocks.urn_custom[:-15])
self.assertEqual(len(reff), 0)
APItestCase.clear_folder(Config.REFF_CACHE_DIRECTORY)
reff = CustomCorpusService.get_custom_corpus_reff(Mocks.urn_custom[:-14])
self.assertEqual(len(reff), 1)
APItestCase.clear_folder(Config.REFF_CACHE_DIRECTORY)
reff = CustomCorpusService.get_custom_corpus_reff(Mocks.urn_custom[:-9])
self.assertEqual(len(reff), 2)
reff = CustomCorpusService.get_custom_corpus_reff(Mocks.urn_custom[:-9])
self.assertEqual(len(reff), 2)
APItestCase.clear_folder(Config.REFF_CACHE_DIRECTORY)
CustomCorpusService.custom_corpora[4].text_parts = []
with patch.object(CustomCorpusService, "init_custom_corpus",
return_value=CustomCorpusService.custom_corpora[4]):
source_urn: str = CustomCorpusService.custom_corpora[4].corpus.source_urn
reff = CustomCorpusService.get_custom_corpus_reff(source_urn)
self.assertEqual(len(reff), 0)
APItestCase.clear_folder(Config.REFF_CACHE_DIRECTORY)
def test_get_custom_corpus_text(self):
""" Retrieves the text for a custom corpus, e.g. a textbook. """
text_list: List[ReferenceableText] = CustomCorpusService.get_custom_corpus_text(Mocks.urn)
self.assertEqual(len(text_list), 0)
urn: str = Config.CUSTOM_CORPUS_CIC_MARC_URN + ":1-2"
text_list = CustomCorpusService.get_custom_corpus_text(urn)
self.assertEqual(len(text_list), 2)
cc: CustomCorpus = CustomCorpusService.get_custom_corpus_by_urn(Config.CUSTOM_CORPUS_CIC_MARC_URN)
cc.text_parts = []
def test_init_custom_corpus(self):
"""Adds custom corpora to the corpus list, e.g. the PROIEL corpora."""
with patch.object(CustomCorpusService, "get_treebank_annotations", return_value=Mocks.annotations):
cc: CustomCorpus = CustomCorpusService.init_custom_corpus(CustomCorpusService.custom_corpora[0])
self.assertEqual(len(cc.text_parts), 1)
cc.text_parts = []
def test_init_custom_corpus_caesar_proiel(self):
""" Checks the consistency of citations for text parts from messy parts of the treebank. """
cc: CustomCorpus = CustomCorpusService.init_custom_corpus(CustomCorpusService.custom_corpora[0])
target_citation: Citation = cc.text_parts[5].sub_text_parts[19].sub_text_parts[1].citation
self.assertEqual(target_citation.value, int(target_citation.label))
def test_init_custom_corpus_commentariolus(self):
""" Initializes the citation system for Copernicus' Commentariolus. """
cc: CustomCorpus = CustomCorpusService.get_custom_corpus_by_urn(
Config.CUSTOM_CORPUS_COPERNICUS_COMMENTARIOLUS_URN)
cc = CustomCorpusService.init_custom_corpus(cc)
self.assertEqual(len(cc.text_parts), 9)
cc.text_parts = []
def test_init_custom_corpus_de_revolutionibus(self):
""" Initializes the citation system for Copernicus' De Revolutionibus Orbium Coelestium. """
cc: CustomCorpus = CustomCorpusService.get_custom_corpus_by_urn(
Config.CUSTOM_CORPUS_COPERNICUS_DE_REVOLUTIONIBUS_URN)
cc = CustomCorpusService.init_custom_corpus(cc)
self.assertEqual(len(cc.text_parts[0].sub_text_parts), 15)
cc.text_parts = []
def test_init_custom_corpus_pro_marcello(self):
""" Initializes the citation system for Cicero's Pro M. Marcello. """
with patch.object(CustomCorpusService, "get_treebank_annotations", return_value=Mocks.annotations):
cc: CustomCorpus = CustomCorpusService.get_custom_corpus_by_urn(Config.CUSTOM_CORPUS_CIC_MARC_URN)
cc = CustomCorpusService.init_custom_corpus(cc)
self.assertEqual(len(cc.text_parts), 34)
cc.text_parts = []
def test_process_corpus_data(self):
"""Builds a graph from annotated text data."""
disk_urn: str = AnnotationService.get_disk_urn(Mocks.urn_custom)
AnnotationService.map_conll_to_graph(corpus_name=Mocks.urn_custom, conll=Mocks.annotations,
file_name=disk_urn)
result: dict = CorpusService.process_corpus_data(
urn=Mocks.urn_custom, annotations=Mocks.annotations, aqls=[Phenomenon.UPOSTAG],
exercise_type=ExerciseType.cloze, search_phenomena=[Phenomenon.UPOSTAG])
gd: GraphData = result["graph_data"]
self.assertEqual(len(gd.nodes), len(Mocks.nodes))
urn_parts: List[str] = Mocks.urn_custom.split(":")
base_urn: str = Mocks.urn_custom.replace(":" + urn_parts[-1], "")
target_corpus: CustomCorpus = CustomCorpusService.get_custom_corpus_by_urn(base_urn)
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
CustomCorpusService.init_custom_corpus(target_corpus)
text_parts_list: List[ReferenceableText] = CorpusService.load_text_list(Mocks.urn_custom)
self.assertEqual(len(text_parts_list), 1)
def test_update_corpora(self):
"""Checks the remote repositories for new corpora to be included in our database."""
with patch.object(MyCapytain.retrievers.cts5.requests, "get", side_effect=TestHelper.cts_get_mock):
CorpusService.update_corpora()
self.assertEqual(len(CorpusService.existing_corpora), 1)
ec: Corpus = CorpusService.existing_corpora[0]
ec.title = ""
DatabaseService.commit()
TestHelper.add_corpus(CorpusMC.from_dict(source_urn="123"))
cls: List[CitationLevel] = [ec.citation_level_1, ec.citation_level_2, ec.citation_level_3]
CorpusService.update_corpus(ec.title, ec.source_urn, ec.author, cls, ec)
self.assertFalse(ec.title)
CorpusService.update_corpora()
self.assertTrue(ec.title)
db.session.query(Corpus).delete()
db.session.query(UpdateInfo).delete()
class DatabaseTestCase(unittest.TestCase):
""" Test suite for database-related functions. """
def setUp(self):
"""Initializes the testing environment."""
self.start_time = time.time()
self.class_name: str = str(self.__class__)
TestHelper.update_flask_app(self.class_name, create_app)
def tearDown(self):
"""Finishes testing by removing the traces."""
print("{0}: {1} seconds".format(self.id(), "%.2f" % (time.time() - self.start_time)))
def test_app_init(self):
"""Creates an MCserver app in testing mode."""
CorpusService.init_graphannis_logging()
self.assertTrue(os.path.exists(Config.GRAPHANNIS_LOG_PATH))
os.remove(Config.GRAPHANNIS_LOG_PATH)
with patch.object(sys, 'argv', Mocks.test_args):
app: Flask = mcserver.get_app()
self.assertIsInstance(app, Flask)
self.assertTrue(app.config["TESTING"])
db.session.query(UpdateInfo).delete()
Mocks.app_dict[self.class_name].app_context.push()
db.session.query(Corpus).delete()
def test_commit(self):
"""Commits the last action to the database and, if it fails, rolls back the current session."""
def commit():
raise OperationalError("", [], "")
with patch.object(mcserver.app.services.databaseService, "db") as mock_db:
mock_db.session.commit.side_effect = commit
with self.assertRaises(OperationalError):
TestHelper.add_corpus(Mocks.corpora[0])
db.session.query(Corpus).delete()
db.session.query(UpdateInfo).delete()
session.make_transient(Mocks.corpora[0])
def test_create_postgres_database(self):
""" Creates a new Postgres database. """
with patch.object(mcserver.app.Session, "execute") as execute_mock:
with patch.object(mcserver.app.Session, "connection"):
create_postgres_database(Mocks.app_dict[self.class_name].app, TestingConfig)
self.assertEqual(execute_mock.call_count, 1)
def test_init_db_alembic(self):
"""In Docker, the alembic version is not initially written to the database, so we need to set it manually."""
self.assertEqual(inspect(db.engine).has_table(Config.DATABASE_TABLE_ALEMBIC), False)
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
with patch.object(mcserver.app.services.databaseService, "flask_migrate") as migrate_mock:
migrate_mock.stamp.return_value = MagicMock()
migrate_mock.upgrade.return_value = MagicMock()
migrate_mock.current.return_value = MagicMock()
DatabaseService.init_db_alembic(Mocks.app_dict[self.class_name].app)
self.assertEqual(migrate_mock.stamp.call_count, 1)
def test_init_db_corpus(self):
"""Initializes the corpus table."""
db.session.query(Corpus).delete()
cc: CustomCorpus = CustomCorpusService.custom_corpora[0]
old_corpus: Corpus = Mocks.corpora[0]
old_corpus.source_urn = cc.corpus.source_urn
TestHelper.add_corpus(old_corpus)
del old_corpus
CorpusService.init_corpora()
corpus: Corpus = DatabaseService.query(
Corpus, filter_by=dict(source_urn=cc.corpus.source_urn), first=True)
self.assertEqual(corpus.title, cc.corpus.title)
db.session.query(Corpus).delete()
db.session.query(UpdateInfo).delete()
def test_map_exercise_data_to_database(self):
"""Maps exercise data to the database and saves it for later access."""
ui_exercises: UpdateInfo = UpdateInfo.from_dict(resource_type=ResourceType.exercise_list.name,
last_modified_time=1.0, created_time=1.0)
db.session.add(ui_exercises)
DatabaseService.commit()
exercise_expected: Exercise = Mocks.exercise
exercise: Exercise = map_exercise_data_to_database(
solutions=[Solution.from_dict(x) for x in json.loads(exercise_expected.solutions)],
exercise_data=ExerciseData(json_dict=Mocks.exercise_data.serialize()),
instructions=exercise_expected.instructions, exercise_type=exercise_expected.exercise_type,
exercise_type_translation=exercise_expected.exercise_type_translation, xml_guid=exercise_expected.eid,
conll=exercise_expected.conll, correct_feedback=exercise_expected.correct_feedback,
partially_correct_feedback=exercise_expected.partially_correct_feedback, urn=Mocks.urn_custom,
incorrect_feedback=exercise_expected.incorrect_feedback, search_values=exercise_expected.search_values,
general_feedback=exercise_expected.general_feedback, work_author=exercise_expected.work_author,
work_title=exercise_expected.work_title, language=exercise_expected.language)
expected_values: List[str] = [
exercise_expected.conll, exercise_expected.general_feedback, exercise_expected.incorrect_feedback,
exercise_expected.search_values, exercise_expected.partially_correct_feedback,
exercise_expected.correct_feedback, exercise_expected.instructions,
exercise_expected.exercise_type_translation, exercise_expected.exercise_type, exercise_expected.solutions,
exercise_expected.eid]
actual_values: List[str] = [
exercise.conll, exercise.general_feedback, exercise.incorrect_feedback, exercise.search_values,
exercise.partially_correct_feedback, exercise.correct_feedback, exercise.instructions,
exercise.exercise_type_translation, exercise.exercise_type, exercise.solutions, exercise.eid]
self.assertEqual(expected_values, actual_values)
exercise_from_db: Exercise = DatabaseService.query(Exercise, first=True)
self.assertEqual(exercise, exercise_from_db)
db.session.query(Exercise).delete()
db.session.query(UpdateInfo).delete()
session.make_transient(Mocks.exercise)
def test_update_exercises(self):
"""Deletes old exercises."""
exercises: List[Exercise] = [
ExerciseMC.from_dict(
last_access_time=datetime.utcnow().timestamp(), urn="urn", solutions="[]", eid="eid1"),
ExerciseMC.from_dict(last_access_time=datetime.utcnow().timestamp(), urn="urn",
solutions=json.dumps([Solution().to_dict()]), text_complexity=0.0, eid="eid2")]
db.session.add_all(exercises)
DatabaseService.commit()

Konstantin Schulz
committed
with patch.object(mcserver.app.services.textComplexityService.TextComplexityService, "text_complexity",
return_value=Mocks.text_complexity):
with patch.object(CorpusService, "get_corpus", return_value=Mocks.annis_response):

Konstantin Schulz
committed
ExerciseService.update_exercises()

Konstantin Schulz
committed
exercises = DatabaseService.query(Exercise)
self.assertEqual(len(exercises), 1)
self.assertEqual(exercises[0].text_complexity, 54.53)
db.session.query(Exercise).delete()
if __name__ == '__main__':
runner: unittest.TextTestRunner = unittest.TextTestRunner()
suite: unittest.TestSuite = unittest.TestSuite()
suite.addTests(TestLoader().loadTestsFromTestCase(APItestCase))
suite.addTests(TestLoader().loadTestsFromTestCase(CommonTestCase))
suite.addTests(TestLoader().loadTestsFromTestCase(CorpusTestCase))
suite.addTests(TestLoader().loadTestsFromTestCase(DatabaseTestCase))

Konstantin Schulz
committed
if os.path.exists(Config.GRAPH_DATABASE_DIR_BASE):
shutil.rmtree(Config.GRAPH_DATABASE_DIR_BASE)
# delete the SQLITE database to have a clean start next time
os.remove(TestingConfig.SQLALCHEMY_DATABASE_URI[10:])