From 1cbe28eb49041b28ab2140a43e256c0fe7506c8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=A4schke?= <jaeschke@l3s.de> Date: Thu, 22 Sep 2022 14:18:51 +0200 Subject: [PATCH] changed to fixed set of plays (to work around inconsistent TEI/XML) --- FCA.ipynb | 138 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 78 insertions(+), 60 deletions(-) diff --git a/FCA.ipynb b/FCA.ipynb index a0b216d..89eb873 100644 --- a/FCA.ipynb +++ b/FCA.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "legislative-disclaimer", "metadata": {}, "outputs": [], @@ -34,36 +34,36 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 1, "id": "regular-arthur", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'a0_s0': {'#iphigenie'},\n", - " 'a0_s1': {'#arkas', '#iphigenie'},\n", - " 'a0_s2': {'#iphigenie', '#thoas'},\n", - " 'a0_s3': {'#iphigenie'},\n", - " 'a1_s0': {'#orest', '#pylades'},\n", - " 'a1_s1': {'#iphigenie', '#pylades'},\n", - " 'a2_s0': {'#iphigenie', '#orest'},\n", - " 'a2_s1': {'#orest'},\n", - " 'a2_s2': {'#iphigenie', '#orest', '#pylades'},\n", - " 'a3_s0': {'#iphigenie'},\n", - " 'a3_s1': {'#arkas', '#iphigenie'},\n", - " 'a3_s2': {'#iphigenie'},\n", - " 'a3_s3': {'#iphigenie', '#pylades'},\n", - " 'a3_s4': {'#iphigenie'},\n", - " 'a4_s0': {'#arkas', '#thoas'},\n", - " 'a4_s1': {'#thoas'},\n", - " 'a4_s2': {'#iphigenie', '#thoas'},\n", - " 'a4_s3': {'#iphigenie', '#orest', '#thoas'},\n", - " 'a4_s4': {'#arkas', '#orest', '#pylades', '#thoas'},\n", - " 'a4_s5': {'#iphigenie', '#orest', '#thoas'}}" + "{'0/0': {'#iphigenie'},\n", + " '0/1': {'#arkas', '#iphigenie'},\n", + " '0/2': {'#iphigenie', '#thoas'},\n", + " '0/3': {'#iphigenie'},\n", + " '1/0': {'#orest', '#pylades'},\n", + " '1/1': {'#iphigenie', '#pylades'},\n", + " '2/0': {'#iphigenie', '#orest'},\n", + " '2/1': {'#orest'},\n", + " '2/2': {'#iphigenie', '#orest', '#pylades'},\n", + " '3/0': {'#iphigenie'},\n", + " '3/1': {'#arkas', '#iphigenie'},\n", + " '3/2': {'#iphigenie'},\n", + " '3/3': {'#iphigenie', '#pylades'},\n", + " '3/4': {'#iphigenie'},\n", + " '4/0': {'#arkas', '#thoas'},\n", + " '4/1': {'#thoas'},\n", + " '4/2': {'#iphigenie', '#thoas'},\n", + " '4/3': {'#iphigenie', '#orest', '#thoas'},\n", + " '4/4': {'#arkas', '#orest', '#pylades', '#thoas'},\n", + " '4/5': {'#iphigenie', '#orest', '#thoas'}}" ] }, - "execution_count": 8, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -95,7 +95,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 2, "id": "urban-alloy", "metadata": {}, "outputs": [], @@ -112,7 +112,7 @@ }, { "cell_type": "markdown", - "id": "durable-multimedia", + "id": "working-childhood", "metadata": {}, "source": [ "We use the [concepts](https://github.com/xflr6/concepts) Python library for FCA:" @@ -120,10 +120,20 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "ranking-island", + "execution_count": 3, + "id": "robust-potato", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: concepts in /home/rja/.local/lib/python3.9/site-packages (0.9.2)\r\n", + "Requirement already satisfied: graphviz~=0.7 in /home/rja/.local/lib/python3.9/site-packages (from concepts) (0.20.1)\r\n", + "Requirement already satisfied: bitsets~=0.7 in /home/rja/.local/lib/python3.9/site-packages (from concepts) (0.8.4)\r\n" + ] + } + ], "source": [ "!pip install concepts" ] @@ -138,7 +148,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 4, "id": "grave-hungary", "metadata": {}, "outputs": [ @@ -148,7 +158,7 @@ "17" ] }, - "execution_count": 10, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -170,7 +180,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 5, "id": "piano-practice", "metadata": {}, "outputs": [ @@ -207,7 +217,7 @@ "<g id=\"edge1\" class=\"edge\">\n", "<title>c1->c1</title>\n", "<path fill=\"none\" stroke=\"transparent\" d=\"M125.49,-105.78C134.53,-110.44 145.3,-108.51 145.3,-100 145.3,-91.49 134.53,-89.56 125.49,-94.22\"/>\n", - "<text text-anchor=\"middle\" x=\"119.5\" y=\"-76.76\" font-family=\"Times,serif\" font-size=\"14.00\">a2_s2</text>\n", + "<text text-anchor=\"middle\" x=\"119.5\" y=\"-76.76\" font-family=\"Times,serif\" font-size=\"14.00\">2/2</text>\n", "</g>\n", "<!-- c2 -->\n", "<g id=\"node3\" class=\"node\">\n", @@ -223,7 +233,7 @@ "<g id=\"edge3\" class=\"edge\">\n", "<title>c2->c2</title>\n", "<path fill=\"none\" stroke=\"transparent\" d=\"M206.49,-105.78C215.53,-110.44 226.3,-108.51 226.3,-100 226.3,-91.49 215.53,-89.56 206.49,-94.22\"/>\n", - "<text text-anchor=\"middle\" x=\"200.5\" y=\"-76.76\" font-family=\"Times,serif\" font-size=\"14.00\">a4_s4</text>\n", + "<text text-anchor=\"middle\" x=\"200.5\" y=\"-76.76\" font-family=\"Times,serif\" font-size=\"14.00\">4/4</text>\n", "</g>\n", "<!-- c3 -->\n", "<g id=\"node4\" class=\"node\">\n", @@ -239,7 +249,7 @@ "<g id=\"edge5\" class=\"edge\">\n", "<title>c3->c3</title>\n", "<path fill=\"none\" stroke=\"transparent\" d=\"M410.49,-196.78C419.53,-201.44 430.3,-199.51 430.3,-191 430.3,-182.49 419.53,-180.56 410.49,-185.22\"/>\n", - "<text text-anchor=\"middle\" x=\"404.5\" y=\"-167.76\" font-family=\"Times,serif\" font-size=\"14.00\">a0_s1 a3_s1</text>\n", + "<text text-anchor=\"middle\" x=\"404.5\" y=\"-167.76\" font-family=\"Times,serif\" font-size=\"14.00\">0/1 3/1</text>\n", "</g>\n", "<!-- c4 -->\n", "<g id=\"node5\" class=\"node\">\n", @@ -255,7 +265,7 @@ "<g id=\"edge7\" class=\"edge\">\n", "<title>c4->c4</title>\n", "<path fill=\"none\" stroke=\"transparent\" d=\"M353.49,-196.78C362.53,-201.44 373.3,-199.51 373.3,-191 373.3,-182.49 362.53,-180.56 353.49,-185.22\"/>\n", - "<text text-anchor=\"middle\" x=\"347.5\" y=\"-167.76\" font-family=\"Times,serif\" font-size=\"14.00\">a4_s0</text>\n", + "<text text-anchor=\"middle\" x=\"347.5\" y=\"-167.76\" font-family=\"Times,serif\" font-size=\"14.00\">4/0</text>\n", "</g>\n", "<!-- c5 -->\n", "<g id=\"node6\" class=\"node\">\n", @@ -271,7 +281,7 @@ "<g id=\"edge9\" class=\"edge\">\n", "<title>c5->c5</title>\n", "<path fill=\"none\" stroke=\"transparent\" d=\"M283.49,-105.78C292.53,-110.44 303.3,-108.51 303.3,-100 303.3,-91.49 292.53,-89.56 283.49,-94.22\"/>\n", - "<text text-anchor=\"middle\" x=\"277.5\" y=\"-76.76\" font-family=\"Times,serif\" font-size=\"14.00\">a4_s3 a4_s5</text>\n", + "<text text-anchor=\"middle\" x=\"277.5\" y=\"-76.76\" font-family=\"Times,serif\" font-size=\"14.00\">4/3 4/5</text>\n", "</g>\n", "<!-- c6 -->\n", "<g id=\"node7\" class=\"node\">\n", @@ -292,7 +302,7 @@ "<g id=\"edge11\" class=\"edge\">\n", "<title>c6->c6</title>\n", "<path fill=\"none\" stroke=\"transparent\" d=\"M41.49,-196.78C50.53,-201.44 61.3,-199.51 61.3,-191 61.3,-182.49 50.53,-180.56 41.49,-185.22\"/>\n", - "<text text-anchor=\"middle\" x=\"35.5\" y=\"-167.76\" font-family=\"Times,serif\" font-size=\"14.00\">a1_s0</text>\n", + "<text text-anchor=\"middle\" x=\"35.5\" y=\"-167.76\" font-family=\"Times,serif\" font-size=\"14.00\">1/0</text>\n", "</g>\n", "<!-- c7 -->\n", "<g id=\"node8\" class=\"node\">\n", @@ -308,7 +318,7 @@ "<g id=\"edge14\" class=\"edge\">\n", "<title>c7->c7</title>\n", "<path fill=\"none\" stroke=\"transparent\" d=\"M106.49,-196.78C115.53,-201.44 126.3,-199.51 126.3,-191 126.3,-182.49 115.53,-180.56 106.49,-185.22\"/>\n", - "<text text-anchor=\"middle\" x=\"100.5\" y=\"-167.76\" font-family=\"Times,serif\" font-size=\"14.00\">a1_s1 a3_s3</text>\n", + "<text text-anchor=\"middle\" x=\"100.5\" y=\"-167.76\" font-family=\"Times,serif\" font-size=\"14.00\">1/1 3/3</text>\n", "</g>\n", "<!-- c8 -->\n", "<g id=\"node9\" class=\"node\">\n", @@ -360,7 +370,7 @@ "<g id=\"edge21\" class=\"edge\">\n", "<title>c10->c10</title>\n", "<path fill=\"none\" stroke=\"transparent\" d=\"M299.49,-196.78C308.53,-201.44 319.3,-199.51 319.3,-191 319.3,-182.49 308.53,-180.56 299.49,-185.22\"/>\n", - "<text text-anchor=\"middle\" x=\"293.5\" y=\"-167.76\" font-family=\"Times,serif\" font-size=\"14.00\">a0_s2 a4_s2</text>\n", + "<text text-anchor=\"middle\" x=\"293.5\" y=\"-167.76\" font-family=\"Times,serif\" font-size=\"14.00\">0/2 4/2</text>\n", "</g>\n", "<!-- c11 -->\n", "<g id=\"node12\" class=\"node\">\n", @@ -381,7 +391,7 @@ "<g id=\"edge23\" class=\"edge\">\n", "<title>c11->c11</title>\n", "<path fill=\"none\" stroke=\"transparent\" d=\"M230.49,-196.78C239.53,-201.44 250.3,-199.51 250.3,-191 250.3,-182.49 239.53,-180.56 230.49,-185.22\"/>\n", - "<text text-anchor=\"middle\" x=\"224.5\" y=\"-167.76\" font-family=\"Times,serif\" font-size=\"14.00\">a2_s0</text>\n", + "<text text-anchor=\"middle\" x=\"224.5\" y=\"-167.76\" font-family=\"Times,serif\" font-size=\"14.00\">2/0</text>\n", "</g>\n", "<!-- c12 -->\n", "<g id=\"node13\" class=\"node\">\n", @@ -428,7 +438,7 @@ "<g id=\"edge29\" class=\"edge\">\n", "<title>c13->c13</title>\n", "<path fill=\"none\" stroke=\"transparent\" d=\"M238.87,-285.22C247.59,-287.11 257.3,-286.04 257.3,-282 257.3,-277.96 247.59,-276.89 238.87,-278.78\"/>\n", - "<text text-anchor=\"middle\" x=\"236.19\" y=\"-260.32\" font-family=\"Times,serif\" font-size=\"14.00\">a4_s1</text>\n", + "<text text-anchor=\"middle\" x=\"236.19\" y=\"-260.32\" font-family=\"Times,serif\" font-size=\"14.00\">4/1</text>\n", "</g>\n", "<!-- c13->c13 -->\n", "<g id=\"edge30\" class=\"edge\">\n", @@ -460,7 +470,7 @@ "<g id=\"edge34\" class=\"edge\">\n", "<title>c14->c14</title>\n", "<path fill=\"none\" stroke=\"transparent\" d=\"M126.87,-285.22C135.59,-287.11 145.3,-286.04 145.3,-282 145.3,-277.96 135.59,-276.89 126.87,-278.78\"/>\n", - "<text text-anchor=\"middle\" x=\"124.19\" y=\"-260.32\" font-family=\"Times,serif\" font-size=\"14.00\">a2_s1</text>\n", + "<text text-anchor=\"middle\" x=\"124.19\" y=\"-260.32\" font-family=\"Times,serif\" font-size=\"14.00\">2/1</text>\n", "</g>\n", "<!-- c14->c14 -->\n", "<g id=\"edge35\" class=\"edge\">\n", @@ -497,7 +507,7 @@ "<g id=\"edge39\" class=\"edge\">\n", "<title>c15->c15</title>\n", "<path fill=\"none\" stroke=\"transparent\" d=\"M310.87,-285.22C319.59,-287.11 329.3,-286.04 329.3,-282 329.3,-277.96 319.59,-276.89 310.87,-278.78\"/>\n", - "<text text-anchor=\"middle\" x=\"308.19\" y=\"-260.32\" font-family=\"Times,serif\" font-size=\"14.00\">a0_s0 a0_s3 a3_s0 a3_s2 a3_s4</text>\n", + "<text text-anchor=\"middle\" x=\"308.19\" y=\"-260.32\" font-family=\"Times,serif\" font-size=\"14.00\">0/0 0/3 3/0 3/2 3/4</text>\n", "</g>\n", "<!-- c15->c15 -->\n", "<g id=\"edge40\" class=\"edge\">\n", @@ -539,10 +549,10 @@ "</svg>\n" ], "text/plain": [ - "<graphviz.graphs.Digraph at 0x7f3682f591f0>" + "<graphviz.graphs.Digraph at 0x7f05d9dd3d90>" ] }, - "execution_count": 11, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -561,7 +571,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 6, "id": "organic-cable", "metadata": {}, "outputs": [], @@ -579,21 +589,14 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 20, "id": "interior-steam", "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Widget Javascript not detected. It may not be installed or enabled properly. Reconnecting the current kernel may help.\n" - ] - }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "b55fd6b921fe4355aea902208dc79e29" + "model_id": "54eab76066384427b9ce58b6504695e3" } }, "metadata": {}, @@ -603,14 +606,28 @@ "source": [ "from ipywidgets import interact, Dropdown\n", "import dracor\n", + "from IPython.core.display import display, HTML\n", + "\n", + "corpora = {\n", + " \"ger\" : [\n", + " \"goethe-egmont\", \"goethe-iphigenie-auf-tauris\", \"eichendorff-die-freier\",\n", + " \"haffner-die-fledermaus\", \"kleist-amphitryon\", \"nestroy-eulenspiegel\",\n", + " \"schiller-wilhelm-tell\", \"schiller-kabale-und-liebe\", \"schiller-die-raeuber\"\n", + " ],\n", + " \"shake\" : [\n", + " \"a-midsummer-night-s-dream\", \"macbeth\", \"the-merchant-of-venice\", \n", + " \"romeo-and-juliet\", \"othello\", \"much-ado-about-nothing\", \"king-lear\", \n", + " \"julius-caesar\"\n", + " ]\n", + "}\n", "\n", - "corpora = dracor.corpora()\n", + "# corpora = dracor.corpora()\n", "\n", - "corpus_widget = Dropdown(options=corpora, value='shake', description='Corpus:')\n", - "play_widget = Dropdown(options=dracor.plays(\"shake\"), value=\"a-midsummer-night-s-dream\", description='Play:')\n", + "corpus_widget = Dropdown(options=list(corpora.keys()), value='shake', description='Corpus:')\n", + "play_widget = Dropdown(options=corpora[\"shake\"], value=\"a-midsummer-night-s-dream\", description='Play:')\n", "\n", "def on_update_corpus_widget(*args):\n", - " play_widget.options = dracor.plays(corpus_widget.value)\n", + " play_widget.options = corpora[corpus_widget.value] # dracor.plays(corpus_widget.value)\n", "\n", "corpus_widget.observe(on_update_corpus_widget, 'value')\n", "\n", @@ -631,9 +648,10 @@ " \n", " ctx = concepts.Context(objects, attributes, incidence)\n", " \n", - " print(len(ctx.lattice), \"formal concepts\")\n", + " display(HTML(str(len(ctx.lattice)) + ' formal concepts (<a href=\"https://dracor.org/' + corpus + '/' + play + '\">play on DraCor</a>)'))\n", " ctx.tofile(play + \".cxt\", frmat=\"cxt\")\n", " \n", + " # print(ctx.lattice)\n", " return ctx.lattice.graphviz()" ] } -- GitLab