diff --git a/spacy_basics/spacy-basics.ipynb b/spacy_basics/spacy-basics.ipynb index dd235819d266e429911b1291929274564a7ceb19..15e7d49e90581d40590075e6b4994964f510ff7a 100644 --- a/spacy_basics/spacy-basics.ipynb +++ b/spacy_basics/spacy-basics.ipynb @@ -79,11 +79,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "cc48ddd5-5b57-4e15-b594-066fa49c3fce", "metadata": {}, "outputs": [], "source": [ + "# Please uncomment the following line to download the model\n", "# !python -m spacy download de_core_news_md" ] }, @@ -104,7 +105,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "id": "1f03bc33-1447-4e77-a1e8-e753abdc845b", "metadata": {}, "outputs": [], @@ -114,7 +115,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "id": "a03912ad-03a1-40d9-adca-fdcfe3f4d7bd", "metadata": {}, "outputs": [ @@ -122,7 +123,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "3.5.3\n" + "3.6.1\n" ] } ], @@ -150,19 +151,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 11, "id": "288afd5e-2aac-4024-9e14-92c08519d720", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "c:\\Users\\nitra\\anaconda3\\envs\\spacy_3_6_env\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], + "outputs": [], "source": [ "nlp = spacy.load('de_core_news_md') \n", "\n", @@ -170,6 +162,14 @@ "# nlp = spacy.load('de_core_news_md', disable=['parser', 'tagger'])" ] }, + { + "cell_type": "markdown", + "id": "d59046df-92bf-4488-9560-fa08560063ba", + "metadata": {}, + "source": [ + "Um das Sprachmodell aus einem bestimmten Pfad zu laden, kann folgender Code genutzt werden." + ] + }, { "cell_type": "markdown", "id": "fc8d9510", @@ -182,18 +182,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "17e867a4", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "PosixPath('/opt/conda/lib/python3.10/site-packages/de_core_news_md/de_core_news_md-3.6.0')" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# check path\n", - "# nlp.path" + "nlp.path" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 13, "id": "e530ed3c", "metadata": {}, "outputs": [ @@ -202,14 +213,14 @@ "text/plain": [ "{'lang': 'de',\n", " 'name': 'core_news_md',\n", - " 'version': '3.5.0',\n", + " 'version': '3.6.0',\n", " 'description': 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner.',\n", " 'author': 'Explosion',\n", " 'email': 'contact@explosion.ai',\n", " 'url': 'https://explosion.ai',\n", " 'license': 'MIT',\n", - " 'spacy_version': '>=3.5.0,<3.6.0',\n", - " 'spacy_git_version': '9e0322de1',\n", + " 'spacy_version': '>=3.6.0,<3.7.0',\n", + " 'spacy_git_version': 'cb4fdc83e',\n", " 'vectors': {'width': 300,\n", " 'vectors': 20000,\n", " 'keys': 500000,\n", @@ -1086,7 +1097,7 @@ " 'og': {'p': 0.33333333330000003, 'r': 0.1428571429, 'f': 0.2},\n", " 'adc': {'p': 1.0, 'r': 1.0, 'f': 1.0}},\n", " 'lemma_acc': 0.9769605349,\n", - " 'speed': 11185.9924982716,\n", + " 'speed': 11831.800084006,\n", " 'ents_p': 0.8439391945,\n", " 'ents_r': 0.8342856523000001,\n", " 'ents_f': 0.8390846587,\n", @@ -1117,7 +1128,7 @@ " 'requirements': []}" ] }, - "execution_count": 5, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -1129,7 +1140,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 14, "id": "4c93123b", "metadata": {}, "outputs": [ @@ -1139,7 +1150,7 @@ "text": [ "core_news_md\n", "de\n", - "3.5.0\n" + "3.6.0\n" ] } ], @@ -1152,7 +1163,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 15, "id": "5a25c88a", "metadata": {}, "outputs": [ @@ -1162,7 +1173,7 @@ "'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner.'" ] }, - "execution_count": 7, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -1173,7 +1184,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 16, "id": "8922d4ca", "metadata": {}, "outputs": [ @@ -1183,7 +1194,7 @@ "['LOC', 'MISC', 'ORG', 'PER']" ] }, - "execution_count": 8, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1203,7 +1214,8 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 17, + "id": "3dd71e5f", "metadata": {}, "outputs": [ { @@ -1212,7 +1224,7 @@ "'Companies, agencies, institutions, etc.'" ] }, - "execution_count": 9, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -1224,10 +1236,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "36125b0b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LOC: Non-GPE locations, mountain ranges, bodies of water\n", + "\n", + "###\n", + "\n", + "MISC: Miscellaneous entities, e.g. events, nationalities, products or works of art\n", + "\n", + "###\n", + "\n", + "ORG: Companies, agencies, institutions, etc.\n", + "\n", + "###\n", + "\n", + "PER: Named person or family.\n", + "\n", + "###\n", + "\n" + ] + } + ], "source": [ "for ent in nlp.meta['labels']['ner']:\n", " print(f'{ent}: {spacy.explain(ent)}')\n", @@ -1236,7 +1271,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 19, "id": "982de4f8", "metadata": {}, "outputs": [ @@ -1250,7 +1285,7 @@ " 'mode': 'default'}" ] }, - "execution_count": 10, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -1262,26 +1297,26 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 20, "id": "5b78b125-838b-403c-a3ee-f79d65401167", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x260d0351d30>),\n", - " ('tagger', <spacy.pipeline.tagger.Tagger at 0x260d1a1cb30>),\n", + "[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x7f368fd7bfa0>),\n", + " ('tagger', <spacy.pipeline.tagger.Tagger at 0x7f369d45ee60>),\n", " ('morphologizer',\n", - " <spacy.pipeline.morphologizer.Morphologizer at 0x260d1a1d610>),\n", - " ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x260d18b76f0>),\n", + " <spacy.pipeline.morphologizer.Morphologizer at 0x7f369d45ef20>),\n", + " ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x7f333a6af5a0>),\n", " ('lemmatizer',\n", - " <spacy.pipeline.edit_tree_lemmatizer.EditTreeLemmatizer at 0x260d1a1c830>),\n", + " <spacy.pipeline.edit_tree_lemmatizer.EditTreeLemmatizer at 0x7f369dcbca00>),\n", " ('attribute_ruler',\n", - " <spacy.pipeline.attributeruler.AttributeRuler at 0x260d1a86d10>),\n", - " ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x260d18b78b0>)]" + " <spacy.pipeline.attributeruler.AttributeRuler at 0x7f368fc2b6c0>),\n", + " ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x7f368fd52650>)]" ] }, - "execution_count": 11, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -1306,14 +1341,14 @@ "id": "f71dae96-0027-4923-b745-0ec668a2c916", "metadata": {}, "source": [ - "entnommen von:\n", + "Der Text aus der Datei `example-news-text.txt`ist entnommen von:\n", "\n", - "https://www.zdf.de/nachrichten/politik/bergkarabach-armenien-aserbaidschan-diplomatie-100.html\n" + "https://www.zdf.de/nachrichten/politik/bergkarabach-armenien-aserbaidschan-diplomatie-100.html" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 21, "id": "4a9e983e", "metadata": {}, "outputs": [], @@ -1324,7 +1359,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 24, "id": "95a4dbd3-2245-418a-af71-d3a10abccf03", "metadata": {}, "outputs": [ @@ -1338,19 +1373,12 @@ "Datum:\n", "05.10.2023 14:29 Uhr\n", "\n", - "Die Welt konnte oder wollte nicht helfen, als hunderttausend Armenier aus Bergkarabach vertrieben wurden. Der Konflikt ist nicht vorbei. Doch die Diplomatie hat keine Lösungen.\n", - "Ein Blick auf Khankendi in Aserbaidschan, das den Armeniern auch als Stepanakert, Karabach, bekannt ist.\n", - "Der Konflikt um Bergkarabach ist längst nicht vorbei.\n", - "Quelle: dpa\n", - "\n", - "Die Armenier*innen aus Bergkarabach, die man in diesen Tagen in Eriwan, der Hauptstadt Armeniens antrifft, sind erschöpft, traurig. Aber vor allem wütend. Wahlweise auf die armenische Regierung, auf Aserbaidschan, auf Russland, die UN oder die EU. Denn die Enttäuschungen häufen sich, von allen Seiten.\n", - "\n", - "Eigentlich sollten sich an diesem Donnerstag der armenische Premier Nikol Paschinjan und der aserbaidschanische Präsident Ilham Alijew im spanischen Granada treffen. Es wäre nach dem Angriff Aserbaidschans auf Bergk\n" + "Die Welt konnte oder wollte nicht helfen, als hunderttausend Armenier aus Bergkarabach vertrieben wurden. Der Konflikt ist nicht vorbei. Doch die Diplomatie hat keine L\n" ] } ], "source": [ - "print(text[:1000])" + "print(text[:300])" ] }, { @@ -1370,7 +1398,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 25, "id": "5bc52de7-d8f0-436c-9577-694b902dfc27", "metadata": {}, "outputs": [], @@ -1391,7 +1419,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 26, "id": "00b466fb-24eb-49be-8655-ad628a819e73", "metadata": { "collapsed": true, @@ -1626,7 +1654,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 27, "id": "39380514-28b4-4a30-969f-2e85e90b7682", "metadata": {}, "outputs": [ @@ -1636,7 +1664,7 @@ "spacy.tokens.doc.Doc" ] }, - "execution_count": 13, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -1648,7 +1676,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 28, "id": "29bbb8a8-0dbc-44e2-bfc2-f99bc8948ab7", "metadata": {}, "outputs": [ @@ -1658,7 +1686,7 @@ "'de'" ] }, - "execution_count": 14, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -1670,7 +1698,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 29, "id": "4da0aff1", "metadata": {}, "outputs": [ @@ -1697,9 +1725,14 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 30, "id": "062a607d", - "metadata": {}, + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } + }, "outputs": [ { "name": "stdout", @@ -1934,7 +1967,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 31, "id": "840398b6-2874-459d-b8eb-c6b751029040", "metadata": {}, "outputs": [ @@ -1961,7 +1994,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 33, "id": "6c3443fa", "metadata": {}, "outputs": [ @@ -2106,7 +2139,8 @@ "Kaukasusregion LOC\n", "Zentralasien LOC\n", "https://www.zdf.de/nachrichten/politik/bergkarabach-armenien-aserbaidschan-diplomatie-100.html\n", - " MISC\n" + " MISC\n", + "<class 'spacy.tokens.span.Span'>\n" ] } ], @@ -2115,7 +2149,7 @@ "for ent in doc.ents:\n", " print(ent.text, ent.label_)\n", "\n", - " print(type(ent))" + "print(type(ent))" ] }, { @@ -2128,7 +2162,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 34, "id": "122716f1-fc33-44cd-ba2e-dc8561e3d054", "metadata": {}, "outputs": [ @@ -2168,7 +2202,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 35, "id": "d49cb59a-c9c6-4038-8aec-c673f98b6a3f", "metadata": { "slideshow": { @@ -2348,7 +2382,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 36, "id": "9b223352-b600-43a0-a33f-199cd141321e", "metadata": {}, "outputs": [ @@ -2357,7 +2391,7 @@ "output_type": "stream", "text": [ "20\n", - "{'Sebastian Ehm', 'Emmanuel Maron', 'Olaf Scholz', 'Ursula von der Leyen', 'Moral', 'Röthig', 'Wladimir Putin', 'Armin Coerper', 'Recep Tayyip Erdogan', 'Nina Niebergall', 'Charles Michel', 'Erdogan', 'Putin', 'Südkaukasus-Experte Röthig', 'Nikol Paschinjan', 'Paschinjan', 'Alijew', 'Südkaukasus-Experte\\n\\nAserbaidschan', 'Marcel Röthig', 'Ilham Alijew'}\n" + "{'Südkaukasus-Experte\\n\\nAserbaidschan', 'Charles Michel', 'Wladimir Putin', 'Armin Coerper', 'Erdogan', 'Moral', 'Recep Tayyip Erdogan', 'Emmanuel Maron', 'Nina Niebergall', 'Sebastian Ehm', 'Ursula von der Leyen', 'Ilham Alijew', 'Nikol Paschinjan', 'Marcel Röthig', 'Paschinjan', 'Olaf Scholz', 'Alijew', 'Röthig', 'Südkaukasus-Experte Röthig', 'Putin'}\n" ] } ], @@ -2370,7 +2404,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 38, "id": "80be4b0e", "metadata": {}, "outputs": [ @@ -2426,7 +2460,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 39, "id": "6de0596d-eb6a-4786-bada-69522913771c", "metadata": {}, "outputs": [], @@ -2436,7 +2470,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 40, "id": "a54f0561-9327-49ce-9d21-416a5e7dc094", "metadata": { "slideshow": { @@ -2448,7 +2482,7 @@ { "data": { "text/html": [ - "<span class=\"tex2jax_ignore\"><div class=\"entities\" style=\"line-height: 2.5; direction: ltr\"></br>Konflikt um \n", + "<span class=\"tex2jax_ignore\"><div class=\"entities\" style=\"line-height: 2.5; direction: ltr\">Konflikt um \n", "<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Kaukasus-Region\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">MISC</span>\n", @@ -2458,7 +2492,7 @@ " Bergkarabach\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", "</mark>\n", - " versagt</br>von \n", + " versagt<br>von \n", "<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Nina Niebergall\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PER</span>\n", @@ -2468,12 +2502,12 @@ " Eriwan\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", "</mark>\n", - "</br></br>Datum:</br>\n", + "<br><br>Datum:<br>\n", "<mark class=\"entity\" style=\"background: #7aecec; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " 05.10.2023\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">ORG</span>\n", "</mark>\n", - " 14:29 Uhr</br></br>Die Welt konnte oder wollte nicht helfen, als hunderttausend \n", + " 14:29 Uhr<br><br>Die Welt konnte oder wollte nicht helfen, als hunderttausend \n", "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Armenier\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", @@ -2483,7 +2517,7 @@ " Bergkarabach\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", "</mark>\n", - " vertrieben wurden. Der Konflikt ist nicht vorbei. Doch die Diplomatie hat keine Lösungen.</br>Ein Blick auf \n", + " vertrieben wurden. Der Konflikt ist nicht vorbei. Doch die Diplomatie hat keine Lösungen.<br>Ein Blick auf \n", "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Khankendi\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", @@ -2508,12 +2542,12 @@ " Karabach\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", "</mark>\n", - ", bekannt ist.</br>Der Konflikt um \n", + ", bekannt ist.<br>Der Konflikt um \n", "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Bergkarabach\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", "</mark>\n", - " ist längst nicht vorbei.</br>Quelle: dpa</br></br>Die Armenier*innen aus \n", + " ist längst nicht vorbei.<br>Quelle: dpa<br><br>Die Armenier*innen aus \n", "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Bergkarabach\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", @@ -2548,7 +2582,7 @@ " EU\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">ORG</span>\n", "</mark>\n", - ". Denn die Enttäuschungen häufen sich, von allen Seiten.</br></br>Eigentlich sollten sich an diesem Donnerstag der armenische Premier \n", + ". Denn die Enttäuschungen häufen sich, von allen Seiten.<br><br>Eigentlich sollten sich an diesem Donnerstag der armenische Premier \n", "<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Nikol Paschinjan\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PER</span>\n", @@ -2588,7 +2622,7 @@ " Bergkarabach\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", "</mark>\n", - " zumindest eine Rückkehr an den Verhandlungstisch gewesen. Womöglich sogar die Chance auf einen fragilen Frieden.</br>\n", + " zumindest eine Rückkehr an den Verhandlungstisch gewesen. Womöglich sogar die Chance auf einen fragilen Frieden.<br>\n", "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Aserbaidschan\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", @@ -2598,7 +2632,7 @@ " Granada\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", "</mark>\n", - " ab</br></br>Doch \n", + " ab<br><br>Doch \n", "<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Alijew\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PER</span>\n", @@ -2633,7 +2667,7 @@ " Charles Michel\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PER</span>\n", "</mark>\n", - " hätten teilnehmen sollen.</br></br>Mehr als 100.000 \n", + " hätten teilnehmen sollen.<br><br>Mehr als 100.000 \n", "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Armenier\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", @@ -2643,7 +2677,7 @@ " Bergkarabach\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", "</mark>\n", - " geflohen.</br></br>\n", + " geflohen.<br><br>\n", "<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Alijew\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PER</span>\n", @@ -2673,7 +2707,7 @@ " Armenien\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", "</mark>\n", - " seine Verteidigung sicherstellen könne.</br>Worum es bei Verhandlungen gehen könnte</br></br>\n", + " seine Verteidigung sicherstellen könne.<br>Worum es bei Verhandlungen gehen könnte<br><br>\n", "<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Marcel Röthig\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PER</span>\n", @@ -2698,7 +2732,7 @@ " Röthig\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PER</span>\n", "</mark>\n", - ".</br></br>Aber ich bin realistisch. Ich kann mir nicht vorstellen, dass man jetzt noch von einem gerechten \n", + ".<br><br>Aber ich bin realistisch. Ich kann mir nicht vorstellen, dass man jetzt noch von einem gerechten \n", "<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Frieden\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">MISC</span>\n", @@ -2708,7 +2742,7 @@ " Baku\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", "</mark>\n", - " den Weg der Diplomatie einmal verlassen hat. </br></br>\n", + " den Weg der Diplomatie einmal verlassen hat. <br><br>\n", "<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Marcel Röthig\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PER</span>\n", @@ -2760,7 +2794,7 @@ " Aserbaidschan\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", "</mark>\n", - " sich mit Waffengewalt erkämpfen müsste.</br></br>Es wäre eine Eskalation, die einen anderen geopolitischen Player auf den Plan rufen könnte: Den \n", + " sich mit Waffengewalt erkämpfen müsste.<br><br>Es wäre eine Eskalation, die einen anderen geopolitischen Player auf den Plan rufen könnte: Den \n", "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Iran\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", @@ -2790,7 +2824,7 @@ " Teheran\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", "</mark>\n", - " eine rote Linie gezogen.</br></br>\n", + " eine rote Linie gezogen.<br><br>\n", "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Armenien\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", @@ -2815,12 +2849,12 @@ " Armin Coerper\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PER</span>\n", "</mark>\n", - ".</br></br>\n", + ".<br><br>\n", "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Russland\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", "</mark>\n", - " hat kaum noch Einfluss</br></br>Die Diplomatie wird also durchaus weiter gebraucht. Aber wer hat noch Einfluss in der Region, wer vertritt armenische Interessen? \n", + " hat kaum noch Einfluss<br><br>Die Diplomatie wird also durchaus weiter gebraucht. Aber wer hat noch Einfluss in der Region, wer vertritt armenische Interessen? \n", "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Russland\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", @@ -2830,7 +2864,7 @@ " Armeniens\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", "</mark>\n", - ".</br></br>\n", + ".<br><br>\n", "<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Wladimir Putin\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PER</span>\n", @@ -2860,7 +2894,7 @@ " Moskau\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", "</mark>\n", - ", das längst nicht mehr als verlässlicher Partner wahrgenommen wird.</br></br>\n", + ", das längst nicht mehr als verlässlicher Partner wahrgenommen wird.<br><br>\n", "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Armenien\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", @@ -2875,7 +2909,7 @@ " Internationalen Gerichtshof\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">ORG</span>\n", "</mark>\n", - ":</br></br>Hunderttausend Armenier*innen sind aus \n", + ":<br><br>Hunderttausend Armenier*innen sind aus \n", "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Bergkarabach\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", @@ -2900,7 +2934,7 @@ " Russland\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", "</mark>\n", - ".</br></br>\n", + ".<br><br>\n", "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Russland\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", @@ -2910,7 +2944,7 @@ " Bergkarabach\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", "</mark>\n", - " aus Sicht einiger Armenier*innen aufgegeben hat.</br></br> Die \n", + " aus Sicht einiger Armenier*innen aufgegeben hat.<br><br> Die \n", "<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Russen\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">MISC</span>\n", @@ -2935,7 +2969,7 @@ " Moskauer Orbit\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", "</mark>\n", - " führen.</br></br>Deal zwischen \n", + " führen.<br><br>Deal zwischen \n", "<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Putin\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PER</span>\n", @@ -2945,7 +2979,7 @@ " Erdogan\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", "</mark>\n", - "?</br></br>Expert*innen gehen davon aus, dass es einen Deal zwischen \n", + "?<br><br>Expert*innen gehen davon aus, dass es einen Deal zwischen \n", "<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Putin\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PER</span>\n", @@ -2970,7 +3004,7 @@ " Marcel Röthig\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PER</span>\n", "</mark>\n", - " hält das für wahrscheinlich.</br></br> In den vergangenen Jahren haben \n", + " hält das für wahrscheinlich.<br><br> In den vergangenen Jahren haben \n", "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Russland\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", @@ -2995,7 +3029,7 @@ " Ukraine\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", "</mark>\n", - ".</br></br>Es ist nicht auszuschließen, dass \n", + ".<br><br>Es ist nicht auszuschließen, dass \n", "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Russland\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", @@ -3020,7 +3054,7 @@ " Armenien\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", "</mark>\n", - ". Das kleine Land steht sehr allein da.</br></br>Fast zwei Wochen nach der Eroberung durch \n", + ". Das kleine Land steht sehr allein da.<br><br>Fast zwei Wochen nach der Eroberung durch \n", "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Aserbaidschan\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", @@ -3035,7 +3069,7 @@ " Sebastian Ehm\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PER</span>\n", "</mark>\n", - " über die Situation vor Ort.</br></br>\n", + " über die Situation vor Ort.<br><br>\n", "<mark class=\"entity\" style=\"background: #7aecec; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " EU\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">ORG</span>\n", @@ -3045,7 +3079,7 @@ " Moral\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PER</span>\n", "</mark>\n", - "</br></br>Und die \n", + "<br><br>Und die \n", "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Europäer\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", @@ -3075,7 +3109,7 @@ " Russland\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", "</mark>\n", - " als Lieferant ausfiel.</br></br>\n", + " als Lieferant ausfiel.<br><br>\n", "<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Südkaukasus-Experte Röthig\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PER</span>\n", @@ -3095,12 +3129,12 @@ " Aserbaidschan\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", "</mark>\n", - ".</br></br>Doch es ist völlig unklar, ob das reicht, um \n", + ".<br><br>Doch es ist völlig unklar, ob das reicht, um \n", "<mark class=\"entity\" style=\"background: #ff9561; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Aserbaidschan\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", "</mark>\n", - " etwas entgegenzusetzen, das zerbrochene Vertrauen der Armenier*innen zurückzugewinnen.</br></br>\n", + " etwas entgegenzusetzen, das zerbrochene Vertrauen der Armenier*innen zurückzugewinnen.<br><br>\n", "<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " Nina Niebergall\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">PER</span>\n", @@ -3120,7 +3154,7 @@ " Zentralasien\n", " <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">LOC</span>\n", "</mark>\n", - ".</br></br>aus: \n", + ".<br><br>aus: \n", "<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", " https://www.zdf.de/nachrichten/politik/bergkarabach-armenien-aserbaidschan-diplomatie-100.html\n", "\n", @@ -3150,7 +3184,7 @@ }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 41, "id": "9666a7f7-13f9-4988-a345-c3fcc50f88fd", "metadata": {}, "outputs": [], @@ -3185,7 +3219,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.10.12" } }, "nbformat": 4,