Skip to content
Snippets Groups Projects
tests.py 73.7 KiB
Newer Older
        """ Retrieves the raw text for a corpus. """
        with patch.object(CorpusService, "get_corpus", return_value=Mocks.annis_response):
            text: str = CorpusService.get_raw_text(Mocks.urn, True)
            self.assertEqual(len(text), 349)

    def test_get_solutions_by_index(self):
        """ If available, makes use of the solution indices to return only the wanted solutions. """
        solutions: List[Solution] = TextService.get_solutions_by_index(Mocks.exercise, [])
        self.assertEqual(len(solutions), 1)

    def test_get_treebank_annotations(self):
        """ Retrieves annotations from a treebank. """
        cache_path: str = os.path.join(Config.TREEBANKS_CACHE_DIRECTORY,
                                       ntpath.basename(CustomCorpusService.custom_corpora[4].file_path) + ".json")
        if os.path.exists(cache_path):
            os.remove(cache_path)
        with patch.object(mcserver.app.services.customCorpusService.conllu, "parse",
                          return_value=Mocks.annotations) as parse_mock:
            with patch.object(CustomCorpusService, "get_treebank_sub_annotations", return_value=Mocks.annotations):
                conll: List[TokenList] = CustomCorpusService.get_treebank_annotations(Mocks.urn_custom)
                self.assertIs(conll, Mocks.annotations)
                with patch.object(mcserver.app.services.customCorpusService.json, "loads", side_effect=ValueError):
                    conll = CustomCorpusService.get_treebank_annotations(Mocks.urn_custom)
                    os.remove(cache_path)
                    self.assertEqual(parse_mock.call_count, 2)

    def test_get_treebank_sub_annotations(self):
        """ Retrieves annotations for nested parts of a treebank. """
        annotations: List[TokenList] = Mocks.annotations + [TokenList([], metadata=OrderedDict([("sent_id", "2")])),
                                                            TokenList([], metadata=OrderedDict([("sent_id", "3")]))]
        conll: List[TokenList] = CustomCorpusService.get_treebank_sub_annotations(
            Mocks.urn + "@1-3", annotations, CustomCorpusService.custom_corpora[4])
        self.assertEqual(len(conll), 3)
        cc: CustomCorpus = CustomCorpusService.custom_corpora[-1]
        urn: str = cc.corpus.source_urn + ":1.1-1.2"
        conll = CustomCorpusService.get_treebank_sub_annotations(urn, [], cc)
        self.assertEqual(len(cc.text_parts), 2)

    def test_get_udpipe(self):
        """Annotates a single text with UdPipe. The beginning of the CONLL has to be left out because it contains the
        randomly generated temp file path and thus cannot be predicted exactly."""
        text = "Caesar fortis est. Galli moriuntur."
        conll = AnnotationService.get_udpipe(text)
        self.assertIn(Mocks.udpipe_string, conll)

    def test_init_custom_corpus(self):
        """Adds custom corpora to the corpus list, e.g. the PROIEL corpora."""
        with patch.object(CustomCorpusService, "get_treebank_annotations", return_value=Mocks.annotations):
            cc: CustomCorpus = CustomCorpusService.init_custom_corpus(CustomCorpusService.custom_corpora[0])
            self.assertEqual(len(cc.text_parts), 1)

    def test_init_db_alembic(self):
        """In Docker, the alembic version is not initially written to the database, so we need to set it manually."""
        if db.engine.dialect.has_table(db.engine, Config.DATABASE_TABLE_ALEMBIC):
            db.engine.execute(f"DROP TABLE {Config.DATABASE_TABLE_ALEMBIC}")
        self.assertEqual(db.engine.dialect.has_table(db.engine, Config.DATABASE_TABLE_ALEMBIC), False)
        DatabaseService.init_db_alembic()
        self.assertEqual(db.engine.dialect.has_table(db.engine, Config.DATABASE_TABLE_ALEMBIC), True)

    def test_init_db_corpus(self):
        """Initializes the corpus table."""
        cc: CustomCorpus = CustomCorpusService.custom_corpora[0]
        old_corpus: Corpus = Mocks.corpora[0]
        old_corpus.source_urn = cc.corpus.source_urn
        McTestCase.add_corpus(old_corpus)
        del old_corpus
        DatabaseService.init_db_corpus()
        corpus: Corpus = db.session.query(Corpus).filter_by(source_urn=cc.corpus.source_urn).first()
        self.assertEqual(corpus.title, cc.corpus.title)
        db.session.query(Corpus).delete()
        db.session.query(UpdateInfo).delete()

    def test_init_stop_words_latin(self):
        """Initializes the stop words list for Latin texts and caches it if necessary."""

        def clear_cache():
            if os.path.exists(Config.STOP_WORDS_LATIN_PATH):
                os.remove(Config.STOP_WORDS_LATIN_PATH)

        clear_cache()
        stop_word_list: Dict[str, List[str]] = {"a": ["b"]}
        mr: MockResponse = MockResponse(json.dumps(stop_word_list))
        with patch.object(mcserver.app.services.textService.requests, "get", return_value=mr) as mock_get_request:
            TextService.init_stop_words_latin()
            self.assertEqual(len(TextService.stop_words_latin), 1)
            TextService.init_stop_words_latin()
            clear_cache()
            self.assertEqual(mock_get_request.call_count, 1)

    def test_load_text_list(self):
        """ Loads the text list for a new corpus. """
        with patch.object(mcserver.app.services.corpusService.HttpCtsRetriever, 'getPassage',
                          return_value=Mocks.cts_passage_xml) as get_passage_mock:
            text_parts: List[Tuple[str, str]] = CorpusService.load_text_list(Mocks.urn)
            self.assertEqual(len(text_parts), 2)
            get_passage_mock.return_value = Mocks.cts_passage_xml_2_levels
            text_parts = CorpusService.load_text_list(Mocks.urn[:-8] + "-1.1")
            self.assertEqual(len(text_parts), 1)
            get_passage_mock.return_value = Mocks.cts_passage_xml_1_level
            text_parts = CorpusService.load_text_list(Mocks.urn[:-10] + "-3")
            self.assertEqual(len(text_parts), 3)
            get_passage_mock.side_effect = HTTPError()
            text_parts: List[Tuple[str, str]] = CorpusService.load_text_list(Mocks.urn)
            self.assertEqual(text_parts, [])

    def test_make_docx_file(self):
        """ Saves an exercise to a DOCX file (e.g. for later download). """
        file_path: str = os.path.join(Config.TMP_DIRECTORY, "make_docx_file.docx")
        solutions: List[Solution] = [Solution.from_dict(x) for x in json.loads(Mocks.exercise.solutions)]
        FileService.make_docx_file(Mocks.exercise, file_path, Mocks.annotations, FileType.DOCX, solutions)
        self.assertEqual(os.path.getsize(file_path), 36611)
        Mocks.exercise.exercise_type = ExerciseType.markWords.value
        FileService.make_docx_file(Mocks.exercise, file_path, Mocks.annotations, FileType.DOCX, solutions)
        self.assertEqual(os.path.getsize(file_path), 36599)
        Mocks.exercise.exercise_type = ExerciseType.matching.value
        FileService.make_docx_file(Mocks.exercise, file_path, Mocks.annotations, FileType.DOCX, solutions)
        self.assertEqual(os.path.getsize(file_path), 36714)
        Mocks.exercise.exercise_type = ExerciseType.cloze.value
        os.remove(file_path)

    def test_make_tmp_file_from_exercise(self):
        """ Creates a temporary file from a given exercise, e.g. for downloading. """
        df: DownloadableFile = FileService.make_tmp_file_from_exercise(FileType.XML, Mocks.exercise, [0])
        self.assertTrue(os.path.exists(df.file_path))
        os.remove(df.file_path)
        df: DownloadableFile = FileService.make_tmp_file_from_exercise(FileType.DOCX, Mocks.exercise, [0])
        self.assertTrue(os.path.exists(df.file_path))
        os.remove(df.file_path)

    def test_make_tmp_file_from_html(self):
        """ Creates a temporary file from a given HTML string, e.g. for downloading. """
        html: str = "<html lang='la'><p>test</p><span class='tok'><u>abc</u></span></html>"
        df: DownloadableFile = FileService.make_tmp_file_from_html(Mocks.urn_custom, FileType.PDF, html)
        self.assertTrue(os.path.exists(df.file_path))
        os.remove(df.file_path)
        df: DownloadableFile = FileService.make_tmp_file_from_html(Mocks.urn_custom, FileType.DOCX, html)
        self.assertTrue(os.path.exists(df.file_path))
        os.remove(df.file_path)

    def test_map_graph_data(self):
        """Maps graph data to exercise data."""
        ed_expected: ExerciseData = Mocks.exercise_data
        node_expected: NodeMC = ed_expected.graph.nodes[0]
        node = {"id": node_expected.id, "annis::node_name": node_expected.annis_node_name,
                "annis::node_type": node_expected.annis_node_type, "annis::tok": node_expected.annis_tok,
                "annis::type": node_expected.annis_type, "udep::feats": node_expected.udep_feats,
                "udep::lemma": node_expected.udep_lemma, "udep::upostag": node_expected.udep_upostag,
                "udep::xpostag": node_expected.udep_xpostag}
        link_expected: LinkMC = ed_expected.graph.links[0]
        link = {"source": link_expected.source, "target": link_expected.target,
                "annis::component_name": link_expected.annis_component_name,
                "annis::component_type": link_expected.annis_component_type, "udep::deprel": link_expected.udep_deprel}
        graph_data_raw: dict = dict(directed=ed_expected.graph.directed, graph=ed_expected.graph.graph,
                                    multigraph=ed_expected.graph.multigraph, links=[link], nodes=[node])
        gd: GraphData = AnnotationService.map_graph_data(graph_data_raw=graph_data_raw)
        self.assertEqual(gd.graph, ed_expected.graph.graph)
        self.assertEqual(gd.multigraph, ed_expected.graph.multigraph)
        self.assertEqual(gd.directed, ed_expected.graph.directed)
        self.assertEqual(gd.nodes[0], ed_expected.graph.nodes[0])
        self.assertEqual(gd.links[0], ed_expected.graph.links[0])

    def test_models(self):
        """ Tests various models and their specific methods. """
        self.assertFalse(Mocks.corpora[0] == Mocks.corpora[1])
        self.assertFalse(Mocks.corpora[0] == "")
        self.assertTrue(Mocks.exercise.__repr__().startswith("<Exercise"))
        ui: UpdateInfo = UpdateInfo.from_dict(resource_type=ResourceType.cts_data.name, created_time=1,
                                              last_modified_time=1)
        self.assertTrue(ui.__repr__().startswith("<UpdateInfo"))
        del ui
        self.assertFalse(Mocks.graph_data.links[0] == Mocks.graph_data.links[1])
        self.assertTrue(Mocks.graph_data.links[0] == Mocks.graph_data.links[0])
        self.assertFalse(Mocks.graph_data.nodes[0] == Mocks.graph_data.nodes[1])
        self.assertTrue(Mocks.graph_data.nodes[0] == Mocks.graph_data.nodes[0])
        choice_dict: dict = dict(id="", description={"en-US": "desc"})
        self.assertEqual(Choice(choice_dict).serialize(), choice_dict)
        xapi: XapiStatement = XapiStatement(json.loads(Mocks.xapi_json_string)["0"])
        self.assertEqual(len(xapi.serialize().keys()), 5)
        db.session.query(UpdateInfo).delete()
        session.make_transient(Mocks.corpora[0])
        session.make_transient(Mocks.exercise)

    def test_sort_nodes(self):
        """Sorts the nodes according to the ordering links, i.e. by their tokens' occurrence in the text."""
        old_graph_data: GraphData = GraphData(nodes=[], links=[])
        new_graph_data: GraphData = AnnotationService.sort_nodes(old_graph_data)
        self.assertIs(old_graph_data, new_graph_data)

    def test_strip_name_spaces(self):
        """Removes all namespaces from an XML document for easier parsing, e.g. with XPath."""
        xml: etree._Element = etree.Element("{namespace}root")
        child: etree._Element = etree.Element("{namespace}child")
        xml.append(child)
        with patch("mcserver.app.services.xmlService.hasattr", return_value=False) as has_attr_mock:
            XMLservice.strip_name_spaces(xml)
            self.assertEqual(len(child.tag), 16)
            has_attr_mock.return_value = True
            XMLservice.strip_name_spaces(xml)
            self.assertEqual(len(child.tag), 5)

    def test_start_updater(self):
        """Starts an updater thread."""
        t: Thread = start_updater(Mocks.app_dict[self.class_name].app)
        self.assertIsInstance(t, Thread)
        self.assertTrue(t.is_alive())

    def test_update_exercises(self):
        """Deletes old exercises."""
        exercises: List[Exercise] = [
            ExerciseMC.from_dict(last_access_time=datetime.utcnow().timestamp(), urn="urn", solutions="[]", eid="eid1"),
            ExerciseMC.from_dict(last_access_time=datetime.utcnow().timestamp(), urn="urn",
                                 solutions=json.dumps([Solution().to_dict()]), text_complexity=0, eid="eid2")]
        db.session.add_all(exercises)

        with patch.object(mcserver.app.services.textComplexityService.requests, "post",
                          return_value=MockResponse(Mocks.text_complexity_json_string)):
            with patch.object(CorpusService, "get_corpus", return_value=Mocks.annis_response):
                DatabaseService.update_exercises(False)
                exercises = db.session.query(Exercise).all()
                self.assertEqual(len(exercises), 1)
                self.assertEqual(exercises[0].text_complexity, 54.53)
        db.session.query(Exercise).delete()


if __name__ == '__main__':
    runner: unittest.TextTestRunner = unittest.TextTestRunner()
    suite: unittest.TestSuite = unittest.TestSuite()
    suite.addTests(TestLoader().loadTestsFromTestCase(McTestCase))
    suite.addTests(TestLoader().loadTestsFromTestCase(CsmTestCase))
    suite.addTests(TestLoader().loadTestsFromTestCase(CommonTestCase))
    runner.run(suite)
    if os.path.exists(Config.GRAPH_DATABASE_DIR):
        shutil.rmtree(Config.GRAPH_DATABASE_DIR)
    # delete the SQLITE database to have a clean start next time
    os.remove(TestingConfig.SQLALCHEMY_DATABASE_URI[10:])