Skip to content
Snippets Groups Projects
tests.py 73.7 KiB
Newer Older
  • Learn to ignore specific revisions
  •         """ Retrieves the raw text for a corpus. """
            with patch.object(CorpusService, "get_corpus", return_value=Mocks.annis_response):
                text: str = CorpusService.get_raw_text(Mocks.urn, True)
                self.assertEqual(len(text), 349)
    
        def test_get_solutions_by_index(self):
            """ If available, makes use of the solution indices to return only the wanted solutions. """
    
            solutions: List[Solution] = TextService.get_solutions_by_index(Mocks.exercise, [])
    
            self.assertEqual(len(solutions), 1)
    
        def test_get_treebank_annotations(self):
            """ Retrieves annotations from a treebank. """
            cache_path: str = os.path.join(Config.TREEBANKS_CACHE_DIRECTORY,
                                           ntpath.basename(CustomCorpusService.custom_corpora[4].file_path) + ".json")
            if os.path.exists(cache_path):
                os.remove(cache_path)
            with patch.object(mcserver.app.services.customCorpusService.conllu, "parse",
                              return_value=Mocks.annotations) as parse_mock:
                with patch.object(CustomCorpusService, "get_treebank_sub_annotations", return_value=Mocks.annotations):
                    conll: List[TokenList] = CustomCorpusService.get_treebank_annotations(Mocks.urn_custom)
                    self.assertIs(conll, Mocks.annotations)
                    with patch.object(mcserver.app.services.customCorpusService.json, "loads", side_effect=ValueError):
                        conll = CustomCorpusService.get_treebank_annotations(Mocks.urn_custom)
                        os.remove(cache_path)
                        self.assertEqual(parse_mock.call_count, 2)
    
        def test_get_treebank_sub_annotations(self):
            """ Retrieves annotations for nested parts of a treebank. """
            annotations: List[TokenList] = Mocks.annotations + [TokenList([], metadata=OrderedDict([("sent_id", "2")])),
                                                                TokenList([], metadata=OrderedDict([("sent_id", "3")]))]
            conll: List[TokenList] = CustomCorpusService.get_treebank_sub_annotations(
                Mocks.urn + "@1-3", annotations, CustomCorpusService.custom_corpora[4])
            self.assertEqual(len(conll), 3)
            cc: CustomCorpus = CustomCorpusService.custom_corpora[-1]
            urn: str = cc.corpus.source_urn + ":1.1-1.2"
            conll = CustomCorpusService.get_treebank_sub_annotations(urn, [], cc)
            self.assertEqual(len(cc.text_parts), 2)
    
        def test_get_udpipe(self):
            """Annotates a single text with UdPipe. The beginning of the CONLL has to be left out because it contains the
            randomly generated temp file path and thus cannot be predicted exactly."""
            text = "Caesar fortis est. Galli moriuntur."
            conll = AnnotationService.get_udpipe(text)
            self.assertIn(Mocks.udpipe_string, conll)
    
        def test_init_custom_corpus(self):
            """Adds custom corpora to the corpus list, e.g. the PROIEL corpora."""
            with patch.object(CustomCorpusService, "get_treebank_annotations", return_value=Mocks.annotations):
                cc: CustomCorpus = CustomCorpusService.init_custom_corpus(CustomCorpusService.custom_corpora[0])
                self.assertEqual(len(cc.text_parts), 1)
    
        def test_init_db_alembic(self):
    
            """In Docker, the alembic version is not initially written to the database, so we need to set it manually."""
            if db.engine.dialect.has_table(db.engine, Config.DATABASE_TABLE_ALEMBIC):
                db.engine.execute(f"DROP TABLE {Config.DATABASE_TABLE_ALEMBIC}")
    
            self.assertEqual(db.engine.dialect.has_table(db.engine, Config.DATABASE_TABLE_ALEMBIC), False)
            DatabaseService.init_db_alembic()
            self.assertEqual(db.engine.dialect.has_table(db.engine, Config.DATABASE_TABLE_ALEMBIC), True)
    
        def test_init_db_corpus(self):
            """Initializes the corpus table."""
    
            cc: CustomCorpus = CustomCorpusService.custom_corpora[0]
            old_corpus: Corpus = Mocks.corpora[0]
            old_corpus.source_urn = cc.corpus.source_urn
            McTestCase.add_corpus(old_corpus)
            del old_corpus
            DatabaseService.init_db_corpus()
    
            corpus: Corpus = db.session.query(Corpus).filter_by(source_urn=cc.corpus.source_urn).first()
    
            self.assertEqual(corpus.title, cc.corpus.title)
    
            db.session.query(Corpus).delete()
            db.session.query(UpdateInfo).delete()
    
    
        def test_init_stop_words_latin(self):
            """Initializes the stop words list for Latin texts and caches it if necessary."""
    
    
            def clear_cache():
                if os.path.exists(Config.STOP_WORDS_LATIN_PATH):
                    os.remove(Config.STOP_WORDS_LATIN_PATH)
    
            clear_cache()
    
            stop_word_list: Dict[str, List[str]] = {"a": ["b"]}
            mr: MockResponse = MockResponse(json.dumps(stop_word_list))
            with patch.object(mcserver.app.services.textService.requests, "get", return_value=mr) as mock_get_request:
                TextService.init_stop_words_latin()
                self.assertEqual(len(TextService.stop_words_latin), 1)
                TextService.init_stop_words_latin()
    
                clear_cache()
    
                self.assertEqual(mock_get_request.call_count, 1)
    
        def test_load_text_list(self):
            """ Loads the text list for a new corpus. """
            with patch.object(mcserver.app.services.corpusService.HttpCtsRetriever, 'getPassage',
                              return_value=Mocks.cts_passage_xml) as get_passage_mock:
                text_parts: List[Tuple[str, str]] = CorpusService.load_text_list(Mocks.urn)
                self.assertEqual(len(text_parts), 2)
                get_passage_mock.return_value = Mocks.cts_passage_xml_2_levels
                text_parts = CorpusService.load_text_list(Mocks.urn[:-8] + "-1.1")
                self.assertEqual(len(text_parts), 1)
                get_passage_mock.return_value = Mocks.cts_passage_xml_1_level
                text_parts = CorpusService.load_text_list(Mocks.urn[:-10] + "-3")
                self.assertEqual(len(text_parts), 3)
                get_passage_mock.side_effect = HTTPError()
                text_parts: List[Tuple[str, str]] = CorpusService.load_text_list(Mocks.urn)
                self.assertEqual(text_parts, [])
    
        def test_make_docx_file(self):
            """ Saves an exercise to a DOCX file (e.g. for later download). """
            file_path: str = os.path.join(Config.TMP_DIRECTORY, "make_docx_file.docx")
    
            solutions: List[Solution] = [Solution.from_dict(x) for x in json.loads(Mocks.exercise.solutions)]
    
            FileService.make_docx_file(Mocks.exercise, file_path, Mocks.annotations, FileType.DOCX, solutions)
    
            self.assertEqual(os.path.getsize(file_path), 36611)
            Mocks.exercise.exercise_type = ExerciseType.markWords.value
    
            FileService.make_docx_file(Mocks.exercise, file_path, Mocks.annotations, FileType.DOCX, solutions)
    
            self.assertEqual(os.path.getsize(file_path), 36599)
            Mocks.exercise.exercise_type = ExerciseType.matching.value
    
            FileService.make_docx_file(Mocks.exercise, file_path, Mocks.annotations, FileType.DOCX, solutions)
    
            self.assertEqual(os.path.getsize(file_path), 36714)
            Mocks.exercise.exercise_type = ExerciseType.cloze.value
            os.remove(file_path)
    
        def test_make_tmp_file_from_exercise(self):
            """ Creates a temporary file from a given exercise, e.g. for downloading. """
    
            df: DownloadableFile = FileService.make_tmp_file_from_exercise(FileType.XML, Mocks.exercise, [0])
    
            self.assertTrue(os.path.exists(df.file_path))
            os.remove(df.file_path)
    
            df: DownloadableFile = FileService.make_tmp_file_from_exercise(FileType.DOCX, Mocks.exercise, [0])
    
            self.assertTrue(os.path.exists(df.file_path))
            os.remove(df.file_path)
    
        def test_make_tmp_file_from_html(self):
            """ Creates a temporary file from a given HTML string, e.g. for downloading. """
            html: str = "<html lang='la'><p>test</p><span class='tok'><u>abc</u></span></html>"
    
            df: DownloadableFile = FileService.make_tmp_file_from_html(Mocks.urn_custom, FileType.PDF, html)
    
            self.assertTrue(os.path.exists(df.file_path))
            os.remove(df.file_path)
    
            df: DownloadableFile = FileService.make_tmp_file_from_html(Mocks.urn_custom, FileType.DOCX, html)
    
            self.assertTrue(os.path.exists(df.file_path))
            os.remove(df.file_path)
    
        def test_map_graph_data(self):
            """Maps graph data to exercise data."""
            ed_expected: ExerciseData = Mocks.exercise_data
            node_expected: NodeMC = ed_expected.graph.nodes[0]
            node = {"id": node_expected.id, "annis::node_name": node_expected.annis_node_name,
                    "annis::node_type": node_expected.annis_node_type, "annis::tok": node_expected.annis_tok,
    
                    "annis::type": node_expected.annis_type, "udep::feats": node_expected.udep_feats,
                    "udep::lemma": node_expected.udep_lemma, "udep::upostag": node_expected.udep_upostag,
                    "udep::xpostag": node_expected.udep_xpostag}
    
            link_expected: LinkMC = ed_expected.graph.links[0]
            link = {"source": link_expected.source, "target": link_expected.target,
                    "annis::component_name": link_expected.annis_component_name,
                    "annis::component_type": link_expected.annis_component_type, "udep::deprel": link_expected.udep_deprel}
    
            graph_data_raw: dict = dict(directed=ed_expected.graph.directed, graph=ed_expected.graph.graph,
    
                                        multigraph=ed_expected.graph.multigraph, links=[link], nodes=[node])
            gd: GraphData = AnnotationService.map_graph_data(graph_data_raw=graph_data_raw)
            self.assertEqual(gd.graph, ed_expected.graph.graph)
            self.assertEqual(gd.multigraph, ed_expected.graph.multigraph)
            self.assertEqual(gd.directed, ed_expected.graph.directed)
            self.assertEqual(gd.nodes[0], ed_expected.graph.nodes[0])
            self.assertEqual(gd.links[0], ed_expected.graph.links[0])
    
        def test_models(self):
            """ Tests various models and their specific methods. """
            self.assertFalse(Mocks.corpora[0] == Mocks.corpora[1])
            self.assertFalse(Mocks.corpora[0] == "")
            self.assertTrue(Mocks.exercise.__repr__().startswith("<Exercise"))
    
            ui: UpdateInfo = UpdateInfo.from_dict(resource_type=ResourceType.cts_data.name, created_time=1,
                                                  last_modified_time=1)
    
            self.assertTrue(ui.__repr__().startswith("<UpdateInfo"))
            del ui
    
            self.assertFalse(Mocks.graph_data.links[0] == Mocks.graph_data.links[1])
            self.assertTrue(Mocks.graph_data.links[0] == Mocks.graph_data.links[0])
            self.assertFalse(Mocks.graph_data.nodes[0] == Mocks.graph_data.nodes[1])
            self.assertTrue(Mocks.graph_data.nodes[0] == Mocks.graph_data.nodes[0])
    
            choice_dict: dict = dict(id="", description={"en-US": "desc"})
            self.assertEqual(Choice(choice_dict).serialize(), choice_dict)
            xapi: XapiStatement = XapiStatement(json.loads(Mocks.xapi_json_string)["0"])
            self.assertEqual(len(xapi.serialize().keys()), 5)
    
            db.session.query(UpdateInfo).delete()
    
            session.make_transient(Mocks.corpora[0])
            session.make_transient(Mocks.exercise)
    
        def test_sort_nodes(self):
            """Sorts the nodes according to the ordering links, i.e. by their tokens' occurrence in the text."""
    
            old_graph_data: GraphData = GraphData(nodes=[], links=[])
    
            new_graph_data: GraphData = AnnotationService.sort_nodes(old_graph_data)
            self.assertIs(old_graph_data, new_graph_data)
    
        def test_strip_name_spaces(self):
            """Removes all namespaces from an XML document for easier parsing, e.g. with XPath."""
            xml: etree._Element = etree.Element("{namespace}root")
            child: etree._Element = etree.Element("{namespace}child")
            xml.append(child)
            with patch("mcserver.app.services.xmlService.hasattr", return_value=False) as has_attr_mock:
                XMLservice.strip_name_spaces(xml)
                self.assertEqual(len(child.tag), 16)
                has_attr_mock.return_value = True
                XMLservice.strip_name_spaces(xml)
                self.assertEqual(len(child.tag), 5)
    
        def test_start_updater(self):
            """Starts an updater thread."""
    
            t: Thread = start_updater(Mocks.app_dict[self.class_name].app)
    
            self.assertIsInstance(t, Thread)
            self.assertTrue(t.is_alive())
    
        def test_update_exercises(self):
            """Deletes old exercises."""
    
            exercises: List[Exercise] = [
                ExerciseMC.from_dict(last_access_time=datetime.utcnow().timestamp(), urn="urn", solutions="[]", eid="eid1"),
                ExerciseMC.from_dict(last_access_time=datetime.utcnow().timestamp(), urn="urn",
    
                                     solutions=json.dumps([Solution().to_dict()]), text_complexity=0, eid="eid2")]
    
            db.session.add_all(exercises)
    
    
            with patch.object(mcserver.app.services.textComplexityService.requests, "post",
                              return_value=MockResponse(Mocks.text_complexity_json_string)):
                with patch.object(CorpusService, "get_corpus", return_value=Mocks.annis_response):
                    DatabaseService.update_exercises(False)
    
                    exercises = db.session.query(Exercise).all()
    
                    self.assertEqual(len(exercises), 1)
                    self.assertEqual(exercises[0].text_complexity, 54.53)
    
            db.session.query(Exercise).delete()
    
    
    
    if __name__ == '__main__':
        runner: unittest.TextTestRunner = unittest.TextTestRunner()
        suite: unittest.TestSuite = unittest.TestSuite()
        suite.addTests(TestLoader().loadTestsFromTestCase(McTestCase))
        suite.addTests(TestLoader().loadTestsFromTestCase(CsmTestCase))
        suite.addTests(TestLoader().loadTestsFromTestCase(CommonTestCase))
        runner.run(suite)
    
        if os.path.exists(Config.GRAPH_DATABASE_DIR):
            shutil.rmtree(Config.GRAPH_DATABASE_DIR)
    
        # delete the SQLITE database to have a clean start next time
        os.remove(TestingConfig.SQLALCHEMY_DATABASE_URI[10:])