diff --git a/mc_backend/mcserver/app/models.py b/mc_backend/mcserver/app/models.py index 85cb62a43e878918dc3d05ceccd4a9cecc0f5d53..9fa1d47d3341adfc8a069f590ac7726602ebd36a 100644 --- a/mc_backend/mcserver/app/models.py +++ b/mc_backend/mcserver/app/models.py @@ -458,3 +458,9 @@ class CustomCorpus: self.corpus = corpus self.file_path = file_path self.text_parts: List[TextPart] = [] if text_parts is None else text_parts + + +class ReferenceableText: + def __init__(self, text: str = "", urn: str = ""): + self.text: str = text + self.urn: str = urn diff --git a/mc_backend/mcserver/app/services/annotationService.py b/mc_backend/mcserver/app/services/annotationService.py index d17dda0c78addcee1ba43dbe9eed5cc312ea76f5..979c44a50711e4946be207aa205bbd04255ad6c0 100644 --- a/mc_backend/mcserver/app/services/annotationService.py +++ b/mc_backend/mcserver/app/services/annotationService.py @@ -11,7 +11,7 @@ from graphannis.graph import GraphUpdate from networkx import MultiDiGraph, json_graph from mcserver.app.models import Phenomenon, Case, PartOfSpeech, Dependency, Solution, ExerciseType, NodeMC, \ - ExerciseData, GraphData, LinkMC, TextPart + ExerciseData, GraphData, LinkMC, TextPart, ReferenceableText from mcserver.config import Config @@ -82,30 +82,71 @@ class AnnotationService: Phenomenon.LEMMA: {}} @staticmethod - def add_urn_to_sentences(text_list: List[Tuple[str, str]], annotations: List[TokenList]) -> None: + def add_annotations_to_graph( + conll: List[TokenList], g: GraphUpdate, doc_name: str, doc_path: str) -> None: + """ Adds new annotations (provided in CONLL-U format) to a networkx graph. """ + current_urn: str = "" + tok_before: str = "" + for tokenList in conll: + conllid_to_annisid = dict() + # create the sentence ID + sentence_id: int = tokenList.metadata["sent_id"] + current_urn = tokenList.metadata.get('urn', current_urn) + sentence_node_name: str = f"{current_urn}/{doc_name}#sent{sentence_id}" + # add nodes + for tok in tokenList.tokens: + token_id: int = tok["id"] + # map CONLL to graphANNIS + tok_id_final: str = sentence_node_name + f"tok{token_id}" + conllid_to_annisid[token_id] = tok_id_final + AnnotationService.map_token(tok, tok_id_final, g) + # a token belongs to its document + g.add_edge(tok_id_final, doc_path, 'annis', 'PartOf', '') + if tok_before: + # add ordering edge between the tokens + g.add_edge(tok_before, tok_id_final, 'annis', 'Ordering', '') + # remember the current token for the next iteration + tok_before = tok_id_final + # add pointing relations + for tok in tokenList.tokens: + head: int = tok.get("head", 0) + if head != 0: + tok_id_source = conllid_to_annisid[head] + tok_id_target = conllid_to_annisid[tok['id']] + g.add_edge(tok_id_source, tok_id_target, '', 'Pointing', 'dep') + if Config.AQL_DEPREL in tok: + g.add_edge_label(tok_id_source, tok_id_target, '', 'Pointing', 'dep', 'udep', + Config.AQL_DEPREL, tok[Config.AQL_DEPREL]) + + @staticmethod + def add_urn_to_sentences(text_list: List[ReferenceableText], annotations: List[TokenList]) -> None: """ Adds the relevant URN for every annotated sentence. """ current_text_list_index: int = 0 current_start_index: int = 0 for sent in annotations: first_token: str = sent.tokens[0]["form"] - new_index: int = text_list[current_text_list_index][1].find(first_token, current_start_index) + # check if the text from the text list contains the first token from the annotations + new_index: int = text_list[current_text_list_index].text.find(first_token, current_start_index) if new_index > -1: + # the text from the text list contains multiple sentences from the annotations + # thus, increment only the search index, not the text list index current_start_index = new_index + len(first_token) elif not first_token[-1].isalpha(): - # account for cases where the parser failed to tokenize correctly, thus appending punctuation to the - # end of a regular word - new_index = text_list[current_text_list_index][1].find(first_token[:-1], current_start_index) + # account for cases where the parser failed to tokenize correctly + # this is detected by looking for punctuation appended to the end of a regular word + new_index = text_list[current_text_list_index].text.find(first_token[:-1], current_start_index) if new_index < 0: continue else: current_start_index = new_index + len(first_token) else: - while new_index < 0 and len(text_list) > current_text_list_index: + while new_index < 0 and len(text_list) > current_text_list_index + 1: current_text_list_index += 1 current_start_index = 0 - new_index = text_list[current_text_list_index][1].find(first_token, current_start_index) + new_index = text_list[current_text_list_index].text.find(first_token, current_start_index) current_start_index = new_index + len(first_token) - sent.metadata["urn"] = text_list[current_text_list_index][0] + # assign the URN from the corresponding text list entry to the annotations + sent.metadata["urn"] = text_list[current_text_list_index].urn @staticmethod def get_citation_label(text_parts: List[TextPart], citation_values: List[int]) -> str: @@ -175,46 +216,16 @@ class AnnotationService: # delete any existing corpus with this name cs.delete_corpus(file_name) # currently there is only one document because texts are their own corpus - doc_name = 'doc1' + doc_name: str = 'doc1' with GraphUpdate() as g: - doc_path = corpus_name + '/' + doc_name + doc_path: str = corpus_name + '/' + doc_name # create a corpus and document node # both nodes belong to the corpus graph, not the annotation graph g.add_node(node_name=corpus_name, node_type="corpus") g.add_node(node_name=doc_path, node_type="corpus") # the document is part of the corpus g.add_edge(doc_path, corpus_name, 'annis', 'PartOf', '') - tok_before = None - for tokenList in conll: - conllid_to_annisid = dict() - # create the sentence ID - sentence_id: int = tokenList.metadata["sent_id"] - sentence_node_name: str = f"{tokenList.metadata['urn']}/{doc_name}#sent{sentence_id}" - # add nodes - for tok in tokenList.tokens: - token_id: int = tok["id"] - # map CONLL to graphANNIS - tok_id_final = sentence_node_name + "tok{0}".format(token_id) - conllid_to_annisid[tok['id']] = tok_id_final - AnnotationService.map_token(tok, tok_id_final, g) - # a token belongs to its document - g.add_edge(tok_id_final, doc_path, 'annis', 'PartOf', '') - if tok_before is not None: - # add ordering edge between the tokens - g.add_edge(tok_before, tok_id_final, 'annis', 'Ordering', '') - # remember the current token for the next iteration - tok_before = tok_id_final - # add pointing relations - for tok in tokenList.tokens: - if 'head' in tok: - if tok['head'] != 0: - tok_id_source = conllid_to_annisid[tok['head']] - tok_id_target = conllid_to_annisid[tok['id']] - - g.add_edge(tok_id_source, tok_id_target, '', 'Pointing', 'dep') - if Config.AQL_DEPREL in tok: - g.add_edge_label(tok_id_source, tok_id_target, '', 'Pointing', 'dep', 'udep', - Config.AQL_DEPREL, tok[Config.AQL_DEPREL]) + AnnotationService.add_annotations_to_graph(conll, g, doc_name, doc_path) cs.apply_update(file_name, g) @staticmethod diff --git a/mc_backend/mcserver/app/services/corpusService.py b/mc_backend/mcserver/app/services/corpusService.py index 554b74e6f917951c981517a7bab008c5c37ef670..ff0170e0db74675a18dff5ad93518a6960b68975 100644 --- a/mc_backend/mcserver/app/services/corpusService.py +++ b/mc_backend/mcserver/app/services/corpusService.py @@ -18,7 +18,7 @@ from requests import HTTPError from sqlalchemy.exc import OperationalError from mcserver.app import db from mcserver.app.models import CitationLevel, GraphData, Solution, ExerciseType, Phenomenon, AnnisResponse, CorpusMC, \ - make_solution_element_from_salt_id, FrequencyItem, ResourceType + make_solution_element_from_salt_id, FrequencyItem, ResourceType, ReferenceableText from mcserver.app.services import AnnotationService, XMLservice, TextService, FileService, FrequencyService, \ CustomCorpusService, DatabaseService from mcserver.config import Config @@ -182,8 +182,8 @@ class CorpusService: try: mdg = Config.CORPUS_STORAGE_MANAGER.subcorpus_graph(cts_urn_raw_disk, [doc_id]) except (NoSuchCorpus, GraphANNISException): - text_list: List[Tuple[str, str]] = CorpusService.load_text_list(cts_urn_raw=cts_urn_raw) - raw_text: str = TextService.strip_whitespace(" ".join([x[1] for x in text_list])) + text_list: List[ReferenceableText] = CorpusService.load_text_list(cts_urn_raw=cts_urn_raw) + raw_text: str = TextService.strip_whitespace(" ".join([x.text for x in text_list])) annotations_conll: str = AnnotationService.get_udpipe(raw_text) # parse CONLL and add root dependencies as separate node annotations annotations = AnnotationService.parse_conll_string(annotations_conll) @@ -355,7 +355,7 @@ class CorpusService: return maybe_urn.startswith("urn:") @staticmethod - def load_text_list(cts_urn_raw: str) -> List[Tuple[str, str]]: + def load_text_list(cts_urn_raw: str) -> List[ReferenceableText]: """ Loads the text list for a new corpus. """ if CustomCorpusService.is_custom_corpus_urn(cts_urn_raw): # this is a custom corpus, e.g. the VIVA textbook diff --git a/mc_backend/mcserver/app/services/customCorpusService.py b/mc_backend/mcserver/app/services/customCorpusService.py index e90a5a6dc9cf4dce2deb01cd18c46efcfcb694ec..beda16adedcd2899f2557717ab23346d4b7f9966 100644 --- a/mc_backend/mcserver/app/services/customCorpusService.py +++ b/mc_backend/mcserver/app/services/customCorpusService.py @@ -6,7 +6,7 @@ import conllu import rapidjson as json from conllu import TokenList from mcserver import Config -from mcserver.app.models import CustomCorpus, CitationLevel, TextPart, Citation, CorpusMC +from mcserver.app.models import CustomCorpus, CitationLevel, TextPart, Citation, CorpusMC, ReferenceableText from mcserver.app.services import AnnotationService, FileService @@ -56,9 +56,9 @@ class CustomCorpusService: @staticmethod def extract_custom_corpus_text(relevant_text_parts: List[TextPart], start_parts: List[str], end_parts: List[str], base_urn: str, current_idx: int = 0, consider_start: List[bool] = None) \ - -> List[Tuple[str, str]]: + -> List[ReferenceableText]: """ Extracts text from the relevant parts of a (custom) corpus. """ - text_list: List[Tuple[str, str]] = [] + text_list: List[ReferenceableText] = [] nxt: callable = CustomCorpusService.extract_custom_corpus_text_next_level for rtp in relevant_text_parts: new_urn: str = ("." if current_idx else ":").join([base_urn, str(rtp.citation.value)]) @@ -80,11 +80,11 @@ class CustomCorpusService: @staticmethod def extract_custom_corpus_text_next_level(rtp: TextPart, start_parts: List[str], end_parts: List[str], new_urn: str, - text_list: List[Tuple[str, str]], current_idx: int = 0, + text_list: List[ReferenceableText], current_idx: int = 0, consider_start: List[bool] = None) -> None: """ Extracts text from the next level of relevant text parts for a (custom corpus). """ if current_idx == len(start_parts) - 1: - text_list.append((new_urn, rtp.text_value)) + text_list.append(ReferenceableText(rtp.text_value, new_urn)) else: current_idx += 1 text_list += CustomCorpusService.extract_custom_corpus_text(rtp.sub_text_parts, start_parts, @@ -97,8 +97,8 @@ class CustomCorpusService: if AnnotationService.has_urn_sentence_range(urn): urn_split = urn.split("@") urn = urn_split[0] - text_list: List[Tuple[str, str]] = CustomCorpusService.get_custom_corpus_text(urn) - annotations_conll: str = AnnotationService.get_udpipe(" ".join(x[1] for x in text_list)) + text_list: List[ReferenceableText] = CustomCorpusService.get_custom_corpus_text(urn) + annotations_conll: str = AnnotationService.get_udpipe(" ".join(x.text for x in text_list)) conll: List[TokenList] = AnnotationService.parse_conll_string(annotations_conll) if len(urn_split): sentence_range: List[int] = list(map(lambda x: int(x), urn_split[1].split("-"))) @@ -159,7 +159,7 @@ class CustomCorpusService: return [] @staticmethod - def get_custom_corpus_text(urn: str) -> List[Tuple[str, str]]: + def get_custom_corpus_text(urn: str) -> List[ReferenceableText]: """ Retrieves the text for a custom corpus, e.g. a textbook. """ urn_parts: List[str] = urn.split(":") base_urn: str = urn.replace(":" + urn_parts[-1], "") @@ -354,7 +354,7 @@ class CustomCorpusService: @staticmethod def prepare_custom_corpus_text_next_level(rtp: TextPart, start_parts: List[str], end_parts: List[str], new_urn: str, - text_list: List[Tuple[str, str]], nxt: callable, + text_list: List[ReferenceableText], nxt: callable, current_idx: int = 0) -> None: """ Identifies possible candidates and relevant URN parts for the next text level. """ if int(start_parts[current_idx]) < rtp.citation.value < int(end_parts[current_idx]): diff --git a/mc_backend/mcserver/app/services/xmlService.py b/mc_backend/mcserver/app/services/xmlService.py index e364160d21b2b95d3a2634b2f4c79db2d5acf12e..fcc079f01dbf8e13b3692069eb92c17a96a99ade 100644 --- a/mc_backend/mcserver/app/services/xmlService.py +++ b/mc_backend/mcserver/app/services/xmlService.py @@ -6,7 +6,7 @@ from lxml import etree, objectify from lxml.etree import _ElementUnicodeResult from collections import OrderedDict -from mcserver.app.models import ExerciseType, FileType, Solution +from mcserver.app.models import ExerciseType, FileType, Solution, ReferenceableText from mcserver.app.services import TextService from mcserver.models_auto import Exercise @@ -101,9 +101,9 @@ class XMLservice: return TextService.strip_whitespace(" ".join([y["form"] for x in conll for y in x])) @staticmethod - def get_text_parts_by_urn(cts_urn_raw: str, xml: etree._Element) -> List[Tuple[str, str]]: + def get_text_parts_by_urn(cts_urn_raw: str, xml: etree._Element) -> List[ReferenceableText]: """ Parses an XML file for the various text parts and maps them to their respective URN. """ - text_list: List[Tuple[str, str]] = [] + text_list: List[ReferenceableText] = [] base_urn: str = ":".join(cts_urn_raw.split(":")[:-1]) target_elements_string: str = "*[@n]" level1_parts: List[etree._Element] = xml.xpath( @@ -120,15 +120,15 @@ class XMLservice: l3p_value: _ElementUnicodeResult = l3p.xpath("@n")[0] text_values: List[str] = l3p.xpath(".//text()") urn: str = f"{base_urn}:{str(l1p_value)}.{str(l2p_value)}.{str(l3p_value)}" - text_list.append((urn, " ".join(" ".join(text_values).split()))) + text_list.append(ReferenceableText(" ".join(" ".join(text_values).split()), urn)) else: text_values: List[str] = l2p.xpath(".//text()") urn: str = f"{base_urn}:{str(l1p_value)}.{str(l2p_value)}" - text_list.append((urn, " ".join(" ".join(text_values).split()))) + text_list.append(ReferenceableText(" ".join(" ".join(text_values).split()), urn)) else: text_values: List[str] = l1p.xpath(".//text()") urn: str = f"{base_urn}:{str(l1p_value)}" - text_list.append((urn, " ".join(" ".join(text_values).split()))) + text_list.append(ReferenceableText(" ".join(" ".join(text_values).split()), urn)) return text_list @staticmethod diff --git a/mc_backend/mocks.py b/mc_backend/mocks.py index 5fe577cf03a2462f3f229299d5e43286c88a2d79..943fb42c359a1686a21164e0276247f6208ffc30 100644 --- a/mc_backend/mocks.py +++ b/mc_backend/mocks.py @@ -20,7 +20,7 @@ from mcserver import Config, TestingConfig from mcserver.app import db, shutdown_session from mcserver.app.models import Phenomenon, PartOfSpeech, CitationLevel, ExerciseData, GraphData, \ LinkMC, NodeMC, Language, Dependency, Case, AnnisResponse, Solution, TextPart, Citation, ExerciseMC, CorpusMC, \ - SolutionElement + SolutionElement, ReferenceableText from mcserver.app.services import AnnotationService, CustomCorpusService, TextService, DatabaseService from mcserver.models_auto import Corpus, Exercise, UpdateInfo @@ -759,8 +759,9 @@ class Mocks: subgraph_json: str = '{"exercise_id":"","exercise_type":"","frequency_analysis":null,"graph_data":{"directed":true,"graph":{},"links":[],"multigraph":true,"nodes":[{"annis_node_name":"urn:cts:latinLit:phi0448.phi001.perseus-lat2:1.1.1-1.1.1/doc1#sent1tok3","annis_node_type":"node","annis_tok":"Galli","annis_type":"node","id":"salt:/urn:cts:latinLit:phi0448.phi001.perseus-lat2:1.1.1-1.1.1/doc1#sent1tok3","is_oov":null,"udep_lemma":"Gallo","udep_upostag":"VERB","udep_xpostag":"L3|modQ|tem1|stAC","udep_feats":"Tense=Pres|VerbForm=Inf|Voice=Pass","solution":null}]},"solutions":[],"text_complexity":null,"uri":""}' test_args: List[str] = ["tests.py", "-test"] text_complexity_json_string: str = '{"all":54.53,"avg_w_len":5.79,"avg_w_per_sent":17.33,"lex_den":0.73,"n_abl_abs":0,"n_clause":1,"n_gerund":1,"n_inf":1,"n_part":1,"n_punct":3,"n_sent":3,"n_subclause":0,"n_types":48,"n_w":52,"pos":11}' - text_list: List[Tuple[str, str]] = [("urn:cts:latinLit:phi0448.phi001.perseus-lat2:1.1.1", raw_text.split(".")[0]), - ("urn:cts:latinLit:phi0448.phi001.perseus-lat2:1.1.2", raw_text.split(".")[1])] + text_list: List[ReferenceableText] = [ + ReferenceableText(raw_text.split(".")[0], "urn:cts:latinLit:phi0448.phi001.perseus-lat2:1.1.1"), + ReferenceableText(raw_text.split(".")[1], "urn:cts:latinLit:phi0448.phi001.perseus-lat2:1.1.2")] text_parts: List[TextPart] = [ TextPart(citation=Citation(level=CitationLevel.book, label="2", value=2), text_value="text", sub_text_parts=[ TextPart( @@ -769,7 +770,7 @@ class Mocks: TextPart(citation=Citation(level=CitationLevel.section, label="1", value=1), text_value="subtext"), TextPart(citation=Citation(level=CitationLevel.section, label="2", value=2))])])] - udpipe_string: str = "# newpar\n# sent_id = 1\n# text = Caesar fortis est.\n1\tCaesar\tCaeso\tVERB\tC1|grn1|casA|gen1|stAN\tCase=Nom|Degree=Pos|Gender=Masc|Number=Sing\t2\tcsubj\t_\t_\n2\tfortis\tfortis\tADJ\tC1|grn1|casA|gen1|stAN\tCase=Nom|Degree=Pos|Gender=Masc|Number=Sing\t0\troot\t_\t_\n3\test\tsum\tAUX\tN3|modA|tem1|gen6|stAV\tMood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin|Voice=Act\t2\tcop\t_\tSpaceAfter=No\n4\t.\t.\tPUNCT\tPunc\t_\t2\tpunct\t_\t_\n\n# sent_id = 2\n# text = Galli moriuntur.\n1\tGalli\tGallus\tPRON\tF1|grn1|casJ|gen1|stPD\tCase=Nom|Degree=Pos|Gender=Masc|Number=Plur|PronType=Dem\t2\tnsubj:pass\t_\t_\n2\tmoriuntur\tmorior\tVERB\tL3|modJ|tem1|gen9|stAV\tMood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin|Voice=Pass\t0\troot\t_\tSpaceAfter=No\n3\t.\t.\tPUNCT\tPunc\t_\t2\tpunct\t_\tSpacesAfter=\\n\n\n" + udpipe_string: str = "# newpar\n# sent_id = 1\n# text = Caesar fortis est. Galli moriuntur.\n1\tCaesar\tCaesar\tPROPN\tNe\tCase=Nom|Gender=Masc|Number=Sing\t4\tnsubj\t_\t_\n2\tfortis\tfortis\tADJ\tA-\tCase=Nom|Degree=Pos|Gender=Masc|Number=Sing\t3\tamod\t_\t_\n3\test.\test.\tX\tF-\t_\t4\tobj\t_\t_\n4\tGalli\tGalli\tNOUN\tNb\tCase=Nom|Gender=Masc|Number=Plur\t0\troot\t_\t_\n5\tmoriuntur.\tgreek.expression\tNOUN\tNb\t_\t3\tconj\t_\tSpacesAfter=\\n\n\n" urn: str = "urn:cts:latinLit:phi0448.phi001.perseus-lat2:1.1.1-1.1.2" urn_custom: str = f"{CustomCorpusService.custom_corpora[4].corpus.source_urn}:2.23.1-2.23.1" xapi_json_string: str = '{"0":{"actor":{"account":{"name":"9a7eef78-b0b4-471d-b451-e47c9b20d231"},"objectType":"Agent"},"verb":{"id":"http://adlnet.gov/expapi/verbs/answered","display":{"en-US":"answered"}},"object":{"objectType":"Activity","definition":{"extensions":{"http://h5p.org/x-api/h5p-local-content-id":1},"interactionType":"fill-in","type":"http://adlnet.gov/expapi/activities/cmi.interaction","description":{"en-US":"<p>Matching: Assign the matching elements to each other!</p><br/>divisa __________<br/>dividit __________<br/>"},"correctResponsesPattern":["partes[,]Belgis"]}},"context":{"contextActivities":{"category":[{"id":"http://h5p.org/libraries/H5P.DragText-1.8","objectType":"Activity"}]}},"result":{"response":"Belgis[,]","score":{"min":0,"raw":0,"max":2,"scaled":0},"duration":"PT4.12S","completion":true}}}' diff --git a/mc_backend/tests.py b/mc_backend/tests.py index 732e9d226ca6f32ccab4829baf567b5d42f11336..7c6ff6a8029cd9494c1c417d9f0491c990f5adc4 100644 --- a/mc_backend/tests.py +++ b/mc_backend/tests.py @@ -23,6 +23,7 @@ from typing import Dict, List, Tuple, Type, Any from conllu import TokenList from flask import Flask from gensim.models import Word2Vec +from graphannis.graph import GraphUpdate from lxml import etree from networkx import MultiDiGraph, Graph from requests import HTTPError @@ -38,7 +39,7 @@ from mcserver.app.api.exerciseAPI import map_exercise_data_to_database from mcserver.app.models import ResourceType, FileType, ExerciseType, ExerciseData, \ NodeMC, LinkMC, GraphData, Phenomenon, CustomCorpus, AnnisResponse, Solution, DownloadableFile, Language, \ VocabularyCorpus, TextComplexityMeasure, CitationLevel, FrequencyItem, TextComplexity, Dependency, PartOfSpeech, \ - Choice, XapiStatement, ExerciseMC, CorpusMC, make_solution_element_from_salt_id, Sentence + Choice, XapiStatement, ExerciseMC, CorpusMC, make_solution_element_from_salt_id, Sentence, ReferenceableText from mcserver.app.services import AnnotationService, CorpusService, FileService, CustomCorpusService, DatabaseService, \ XMLservice, TextService, FrequencyService, ExerciseService from mcserver.config import TestingConfig, Config @@ -802,11 +803,11 @@ class CsmTestCase(unittest.TestCase): with patch.object(CorpusService, "load_text_list", return_value=Mocks.text_list): with patch.object(CorpusService, "get_raw_text", return_value=Mocks.raw_text): conll = CorpusService.get_annotations_from_string(Mocks.urn) - self.assertEqual(len(conll[0]), 4) + self.assertEqual(len(conll[0]), 5) mdg: MultiDiGraph = CorpusService.get_graph(Mocks.urn) - self.assertEqual(len(mdg.nodes), 7) + self.assertEqual(len(mdg.nodes), 5) mdg = CorpusService.get_graph(f"{Mocks.urn}@1-1") - self.assertEqual(len(mdg.nodes), 7) + self.assertEqual(len(mdg.nodes), 5) with patch.object(CustomCorpusService, "get_treebank_annotations", return_value=Mocks.annotations): conll = CorpusService.get_annotations_from_string(Mocks.urn_custom) self.assertEqual(len(conll[0]), 6) @@ -848,6 +849,19 @@ class CsmTestCase(unittest.TestCase): db.session.query(UpdateInfo).delete() assert not update_mock.called + def test_map_conll_to_graph(self): + """ Saves an annotated corpus in CONLL format to the ANNIS corpus storage. """ + conll: List[TokenList] = Mocks.annotations + copy.deepcopy(Mocks.annotations) + conll[1].metadata = dict(sent_id="2") + disk_urn: str = AnnotationService.get_disk_urn(Mocks.urn_custom) + AnnotationService.map_conll_to_graph(corpus_name=Mocks.urn_custom, conll=conll, + cs=Config.CORPUS_STORAGE_MANAGER, file_name=disk_urn) + result: dict = CorpusService.process_corpus_data( + urn=Mocks.urn_custom, annotations=conll, aqls=["tok"], exercise_type=ExerciseType.cloze, + search_phenomena=[Phenomenon.UPOSTAG]) + gd: GraphData = AnnotationService.map_graph_data(result["graph_data_raw"]) + self.assertEqual(gd.nodes[-1].id.split("/")[0], gd.nodes[0].id.split("/")[0]) + def test_process_corpus_data(self): """Builds a graph from annotated text data.""" disk_urn: str = AnnotationService.get_disk_urn(Mocks.urn_custom) @@ -863,7 +877,7 @@ class CsmTestCase(unittest.TestCase): target_corpus: CustomCorpus = next( (x for x in CustomCorpusService.custom_corpora if x.corpus.source_urn == base_urn), None) CustomCorpusService.init_custom_corpus(target_corpus) - text_parts_list: List[Tuple[str, str]] = CorpusService.load_text_list(Mocks.urn_custom) + text_parts_list: List[ReferenceableText] = CorpusService.load_text_list(Mocks.urn_custom) self.assertEqual(len(text_parts_list), 1) def test_run_app(self): @@ -905,16 +919,20 @@ class CommonTestCase(unittest.TestCase): def test_add_urn_to_sentences(self): """ Adds the relevant URN for every annotated sentence. """ conll: List[TokenList] = copy.deepcopy(Mocks.annotations) - text_list: List[Tuple[str, str]] = [(Mocks.urn, conll[0].tokens[0]["form"]), (Mocks.urn_custom, "")] + text_list: List[ReferenceableText] = [ + ReferenceableText(conll[0].tokens[0]["form"], Mocks.urn), + ReferenceableText("", Mocks.urn_custom), ReferenceableText(conll[0].tokens[0]["form"], Mocks.urn_custom)] conll[0].tokens[0]["form"] += "." conll.append(TokenList(tokens=[ {"id": 1, "form": "Caesar.", "lemma": "Caeso", "upostag": "VERB", "xpostag": "L3|modJ|tem3|gen4|stAV", "feats": {"Mood": "Ind", "Number": "Sing", "Person": "1", "Tense": "Fut", "VerbForm": "Fin", "Voice": "Pass"}, "head": 0, "deprel": "root", "deps": None, "misc": {"ref": "1.1"}}], metadata=OrderedDict([("sent_id", "2"), ("urn", "")]))) + conll += copy.deepcopy(Mocks.annotations) AnnotationService.add_urn_to_sentences(text_list, conll) self.assertEqual(conll[0].metadata["urn"], Mocks.urn) self.assertEqual(conll[1].metadata["urn"], "") + self.assertEqual(conll[2].metadata["urn"], Mocks.urn_custom) def test_create_xml_string(self): """Exports the exercise data to the Moodle XML format. See https://docs.moodle.org/35/en/Moodle_XML_format .""" @@ -937,13 +955,13 @@ class CommonTestCase(unittest.TestCase): def test_extract_custom_corpus_text(self): """ Extracts text from the relevant parts of a (custom) corpus. """ - new_text_parts: List[Tuple[str, str]] = CustomCorpusService.extract_custom_corpus_text( + new_text_parts: List[ReferenceableText] = CustomCorpusService.extract_custom_corpus_text( Mocks.text_parts, ["", ""], ["", "0"], "", 1, [False, True]) self.assertEqual(len(new_text_parts), 0) new_text_parts = CustomCorpusService.extract_custom_corpus_text(Mocks.text_parts, ["", ""], ["", "0"], "", 1) - self.assertEqual(new_text_parts[0][1], Mocks.text_parts[0].text_value) + self.assertEqual(new_text_parts[0].text, Mocks.text_parts[0].text_value) new_text_parts = CustomCorpusService.extract_custom_corpus_text(Mocks.text_parts, ["1"], ["3"], "") - self.assertEqual(new_text_parts[0][1], Mocks.text_parts[0].text_value) + self.assertEqual(new_text_parts[0].text, Mocks.text_parts[0].text_value) def test_get_concept_network(self): """Extracts a network of words from vector data in an AI model.""" @@ -984,7 +1002,7 @@ class CommonTestCase(unittest.TestCase): def test_get_custom_corpus_text(self): """ Retrieves the text for a custom corpus, e.g. a textbook. """ - text_list: List[Tuple[str, str]] = CustomCorpusService.get_custom_corpus_text(Mocks.urn) + text_list: List[ReferenceableText] = CustomCorpusService.get_custom_corpus_text(Mocks.urn) self.assertEqual(len(text_list), 0) def test_get_pdf_html_string(self): @@ -1034,15 +1052,15 @@ class CommonTestCase(unittest.TestCase): self.assertEqual(len(conll), 3) cc: CustomCorpus = CustomCorpusService.custom_corpora[-1] urn: str = cc.corpus.source_urn + ":1.1-1.2" - conll = CustomCorpusService.get_treebank_sub_annotations(urn, [], cc) + conll: List[TokenList] = CustomCorpusService.get_treebank_sub_annotations(urn, [], cc) self.assertEqual(len(cc.text_parts), 2) def test_get_udpipe(self): """Annotates a single text with UdPipe. The beginning of the CONLL has to be left out because it contains the randomly generated temp file path and thus cannot be predicted exactly.""" - text = "Caesar fortis est. Galli moriuntur." - conll = AnnotationService.get_udpipe(text) - self.assertIn(Mocks.udpipe_string, conll) + text: str = "Caesar fortis est. Galli moriuntur." + conll_string: str = AnnotationService.get_udpipe(text) + self.assertIn(Mocks.udpipe_string, conll_string) def test_init_custom_corpus(self): """Adds custom corpora to the corpus list, e.g. the PROIEL corpora.""" @@ -1094,7 +1112,7 @@ class CommonTestCase(unittest.TestCase): """ Loads the text list for a new corpus. """ with patch.object(mcserver.app.services.corpusService.HttpCtsRetriever, 'getPassage', return_value=Mocks.cts_passage_xml) as get_passage_mock: - text_parts: List[Tuple[str, str]] = CorpusService.load_text_list(Mocks.urn) + text_parts: List[ReferenceableText] = CorpusService.load_text_list(Mocks.urn) self.assertEqual(len(text_parts), 2) get_passage_mock.return_value = Mocks.cts_passage_xml_2_levels text_parts = CorpusService.load_text_list(Mocks.urn[:-8] + "-1.1") @@ -1103,7 +1121,7 @@ class CommonTestCase(unittest.TestCase): text_parts = CorpusService.load_text_list(Mocks.urn[:-10] + "-3") self.assertEqual(len(text_parts), 3) get_passage_mock.side_effect = HTTPError() - text_parts: List[Tuple[str, str]] = CorpusService.load_text_list(Mocks.urn) + text_parts = CorpusService.load_text_list(Mocks.urn) self.assertEqual(text_parts, []) def test_make_docx_file(self): diff --git a/mc_frontend/src/app/app.component.html b/mc_frontend/src/app/app.component.html index ebd47e8340fba3174d9f92286012cfc6a22cd331..17ded48bf0050ed8bbc7f4c29ea2e68b0a850420 100644 --- a/mc_frontend/src/app/app.component.html +++ b/mc_frontend/src/app/app.component.html @@ -90,6 +90,13 @@ </ion-grid> </ion-row> </ion-grid> + <ion-grid> + <ion-row> + <ion-col> + <a href="{{configMC.developerMailTo}}">{{ 'EMAIL_ERROR' | translate }}</a> + </ion-col> + </ion-row> + </ion-grid> </ion-content> </ion-menu> <ion-router-outlet id="content1"></ion-router-outlet> diff --git a/mc_frontend/src/app/corpus.service.spec.ts b/mc_frontend/src/app/corpus.service.spec.ts index 5246cb2e4a9ae99473f5184a4e66dd6d5ce342f2..538e546ddb0467eb47054acfbf5db43df933a996 100644 --- a/mc_frontend/src/app/corpus.service.spec.ts +++ b/mc_frontend/src/app/corpus.service.spec.ts @@ -292,7 +292,8 @@ describe('CorpusService', () => { corpusService.initUpdateInfo().then(() => { updateInfoSpy.and.callThrough(); corpusService.storage.get(configMC.localStorageKeyUpdateInfo).then((jsonString: string) => { - expect(jsonString).toBeTruthy(); + const updateInfo: UpdateInfo = JSON.parse(jsonString) as UpdateInfo; + expect(updateInfo.corpora).toBe(1); const setSpy: Spy = spyOn(corpusService.storage, 'set').and.returnValue(Promise.resolve()); corpusService.initUpdateInfo().then(() => { expect(setSpy).toHaveBeenCalledTimes(0); diff --git a/mc_frontend/src/app/preview/preview.page.html b/mc_frontend/src/app/preview/preview.page.html index 6b464ab0f98be4437f8be609c799693162b7fab1..a17703f711480e9e23f3352972d755c803106731 100644 --- a/mc_frontend/src/app/preview/preview.page.html +++ b/mc_frontend/src/app/preview/preview.page.html @@ -35,7 +35,6 @@ </ion-row> <ion-row> <ion-col> - <!-- TODO: enable solution shuffling for H5P ? --> <div class="h5p-container"></div> </ion-col> </ion-row> @@ -50,7 +49,6 @@ beginning that it is going to be a download (instead of an ordinary link or clic <a href="{{urlBase + FileType.Pdf + solutionIndicesString}}" download>{{ 'FILE_TYPE_PDF' | translate }}</a> </ion-col> - <!-- TODO: ADD MOODLE SUPPORT FOR MARK WORDS EXERCISES --> <ion-col *ngIf="corpusService.exercise.type !== ExerciseType.markWords"> <a href="{{urlBase + FileType.Xml + solutionIndicesString}}" download>{{ 'FILE_TYPE_XML' | translate }}</a> diff --git a/mc_frontend/src/app/show-text/show-text.page.spec.ts b/mc_frontend/src/app/show-text/show-text.page.spec.ts index ca3d6cc076ee659f02aa972313d6bd05af4a0985..00c268284f85f71635d1a672ca15d123ecd6c8a8 100644 --- a/mc_frontend/src/app/show-text/show-text.page.spec.ts +++ b/mc_frontend/src/app/show-text/show-text.page.spec.ts @@ -72,6 +72,9 @@ describe('ShowTextPage', () => { showTextPage.corpusService.annisResponse.graph_data.nodes = [{}, {}]; result = showTextPage.getWhiteSpace(0); expect(result.length).toBe(1); + showTextPage.corpusService.annisResponse.graph_data.nodes[1].annis_tok = 'test,'; + result = showTextPage.getWhiteSpace(0); + expect(result.length).toBe(1); showTextPage.corpusService.annisResponse.graph_data.nodes[1].annis_tok = '.'; result = showTextPage.getWhiteSpace(0); expect(result.length).toBe(0); diff --git a/mc_frontend/src/app/show-text/show-text.page.ts b/mc_frontend/src/app/show-text/show-text.page.ts index 036b9ac8f884910c62e3cbd46fc83705c368fd16..62ff3e51640aaed51e1567c6b4815c84324042f4 100644 --- a/mc_frontend/src/app/show-text/show-text.page.ts +++ b/mc_frontend/src/app/show-text/show-text.page.ts @@ -87,7 +87,7 @@ export class ShowTextPage implements OnInit { getWhiteSpace(index: number): string { if (this.corpusService.annisResponse.graph_data.nodes[index + 1]) { if (this.corpusService.annisResponse.graph_data.nodes[index + 1].annis_tok && - this.corpusService.annisResponse.graph_data.nodes[index + 1].annis_tok + this.corpusService.annisResponse.graph_data.nodes[index + 1].annis_tok[0] .search(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g) >= 0) { return ''; }