diff --git a/research-dataset-creation-pipeline.ipynb b/research-dataset-creation-pipeline.ipynb index 5217412581637e75c312b24086df13d214787e06..0c044ec0409fe707257bfccfb9477cc62b118674 100644 --- a/research-dataset-creation-pipeline.ipynb +++ b/research-dataset-creation-pipeline.ipynb @@ -9,47 +9,165 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "from rdflib import Graph\n", + "\n", + "def merge_graphs(input_path_1, input_path_2, output_path):\n", + " g = Graph()\n", + " g.parse(input_path_1)\n", + " g.parse(input_path_2)\n", + " g.serialize(destination=output_path, format='ttl')" + ] + }, + { + "cell_type": "code", + "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Merging data/rdf-output/research-dataset/coat-of-arms-descriptions-research-dataset.ttl into Knowledge Graph\n", - "Merging data/rdf-output/research-dataset/coat-of-arms-manuscript-provenances-research-dataset.ttl into Knowledge Graph\n", - "Created data/rdf-output/research-dataset/digital-heraldry-knowledge-graph-research-dataset.ttl\n", - "Created data/rdf-output/research-dataset/digital-heraldry-knowledge-graph-research-dataset.jsonld\n" + "src/rdf-mappings/map-tblBranch.py:381: DtypeWarning: Columns (2,9,11,16,17,18,19,20,21,31,38,39,49,56,58,62,67,71,72,73,74,75,77,79,82,85,86,88,94,97,98,99,100,102,104,106,109,110,112,114,117,118,119,122,124,125,126,127,128,129,130,131,132,133,134,135,136,138,139,140,141,142,143,144,145,149,150,151,152,153,154,157,165,166,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,188,189,190,194,196) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df_tblBranch = pd.read_csv(csv_input_path, sep='\\t')\n", + " Column Column2 BranchID ... GpSel TinctChanged Column 201\n", + "0 0 NaN 1 ... NaN 1.0 NaN\n", + "1 1 NaN 2 ... NaN 1.0 NaN\n", + "2 2 NaN 3 ... NaN 1.0 NaN\n", + "3 3 NaN 4 ... NaN 1.0 NaN\n", + "4 4 NaN 5 ... NaN 1.0 NaN\n", + "... ... ... ... ... ... ... ...\n", + "40859 40859 NaN 8318_A_1 ... NaN NaN NaN\n", + "40860 40860 NaN 8425_1 ... 0.0 NaN 0.0\n", + "40861 40861 NaN 8425_A_1 ... NaN NaN NaN\n", + "40862 40862 NaN 8425_B_1 ... NaN NaN NaN\n", + "40863 40863 NaN 13770_1 ... 0.0 NaN 1.0\n", + "\n", + "[40864 rows x 205 columns]\n", + "chief\n", + "compon\n", + "chief\n", + "chief\n", + "chief\n", + "chief\n", + "chief\n", + "eschutch\n", + "chief\n", + "chief\n", + "chief\n", + "chief\n", + "chief\n", + "shield\n", + "stag\n", + "compon de Harcourt\n", + "chief\n", + "chief\n", + "chief\n", + "\n", + "chief\n", + "border\n", + "lozenge\n", + "lozenge\n", + "\n", + "\n", + "chief\n", + "chief\n", + "lozenge\n", + "chief\n", + "chief\n", + "compon\n", + "compon\n", + "escuch\n", + "chief\n", + "\n", + "\n", + "escurch\n", + "banner\n", + "chief\n", + "banner\n", + "banner\n", + "banner\n", + "banner\n", + "banner\n", + "escuch\n", + "Created data/rdf-output/research-dataset/knowledge-graph-blazon-research-dataset.ttl\n", + "Created data/rdf-output/research-dataset/knowledge-graph-blazon-research-dataset.jsonld\n", + "Export TBox to file...\n", + "Create data/rdf-output/digital-heraldry-ontology.ttl\n" ] } ], "source": [ - "!python src/rdf-mappings/merge_rdf_files_into_kg.py -i data/rdf-output/research-dataset/coat-of-arms-descriptions-research-dataset.ttl data/rdf-output/research-dataset/coat-of-arms-manuscript-provenances-research-dataset.ttl" + "!python src/rdf-mappings/map-tblBranch.py" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "merge_graphs('data/rdf-output/research-dataset/knowledge-graph-blazon-research-dataset.ttl', 'data/ontologies/digital-heraldry-ontology.ttl', 'data/rdf-output/research-dataset/knowledge-graph-blazon-research-dataset.ttl')" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "merge_graphs('data/rdf-output/research-dataset/knowledge-graph-blazon-research-dataset.ttl', 'data/ontologies/research-dataset/modifier-class-structure.ttl', 'data/rdf-output/research-dataset/knowledge-graph-blazon-research-dataset.ttl')" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Merging data/rdf-output/research-dataset/digital-heraldry-knowledge-graph-research-dataset.ttl into Knowledge Graph\n", - "Merging data/ontologies/research-dataset/modifier-class-structure.ttl into Knowledge Graph\n", - "Created data/rdf-output/research-dataset/digital-heraldry-knowledge-graph-research-dataset.ttl\n", - "Created data/rdf-output/research-dataset/digital-heraldry-knowledge-graph-research-dataset.jsonld\n" + "src/rdf-mappings/map-tblArmItems.py:73: DtypeWarning: Columns (0,7,10,11,13,14,42) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df_tblArmItems = pd.read_csv(csv_input_path, sep='\\t')\n", + " Column ArmItemID ArmCode ItemNr ... CreMulti CreTint CreOrnIDR TGmember\n", + "0 18208.0 21653.0 A 58.0 ... NaN NaN NaN False\n", + "1 18227.0 21672.0 A 323.0 ... NaN NaN NaN False\n", + "2 18278.0 21727.0 A 160.0 ... NaN NaN NaN False\n", + "3 18333.0 21787.0 A 211.0 ... NaN NaN NaN False\n", + "4 18344.0 21798.0 A 82.0 ... NaN NaN NaN False\n", + "... ... ... ... ... ... ... ... ... ...\n", + "87832 33804.0 39070.0 ZUR 586.0 ... NaN NaN NaN False\n", + "87833 33805.0 39071.0 ZUR 587.0 ... NaN NaN NaN False\n", + "87834 4184.0 4306.0 BEL 360.0 ... NaN NaN NaN False\n", + "87835 11106-1 12011.0 BEL 1447.0 ... NaN NaN NaN False\n", + "87836 11214-1 12135.0 BEL 1513.0 ... NaN SO 2.0 False\n", + "\n", + "[87837 rows x 46 columns]\n", + "Created data/rdf-output/research-dataset/knowledge-graph-representation-research-dataset.ttl\n", + "Created data/rdf-output/research-dataset/knowledge-graph-representation-research-dataset.jsonld\n" ] } ], "source": [ - "!python src/rdf-mappings/merge_rdf_files_into_kg.py -i data/rdf-output/research-dataset/digital-heraldry-knowledge-graph-research-dataset.ttl data/ontologies/research-dataset/modifier-class-structure.ttl" + "!python src/rdf-mappings/map-tblArmItems.py" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "merge_graphs('data/rdf-output/research-dataset/knowledge-graph-representation-research-dataset.ttl' , 'data/ontologies/digital-heraldry-ontology-representation.ttl', 'data/rdf-output/research-dataset/knowledge-graph-representation-research-dataset.ttl')" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -57,20 +175,22 @@ "output_type": "stream", "text": [ " ArmCode CoACount ... armorial-position-in-ms grid-system\n", - "0 LBQ 3907.0 ... NaN NaN\n", - "1 RUG 3568.0 ... NaN NaN\n", - "2 BHM 3398.0 ... NaN NaN\n", - "3 URF 2856.0 ... NaN NaN\n", - "4 LYN 2829.0 ... NaN NaN\n", + "0 A 324.0 ... NaN NaN\n", + "1 ABL NaN ... NaN NaN\n", + "2 ABR NaN ... NaN NaN\n", + "3 ACA NaN ... NaN NaN\n", + "4 ACS NaN ... NaN NaN\n", ".. ... ... ... ... ...\n", - "771 WMN NaN ... NaN NaN\n", + "771 NaN NaN ... NaN NaN\n", "772 NaN NaN ... NaN NaN\n", "773 NaN NaN ... NaN NaN\n", "774 NaN NaN ... NaN NaN\n", "775 NaN NaN ... NaN NaN\n", "\n", - "[776 rows x 34 columns]\n", + "[776 rows x 36 columns]\n", "Creating manuscript entities...\n", + "Created data/rdf-output/research-dataset/knowledge-graph-objects-research-dataset.ttl\n", + "Created data/rdf-output/research-dataset/knowledge-graph-objects-research-dataset.jsonld\n", "Loading Knowledge Graph file...\n", "Linking coat of arms representations to manuscript entities...\n", "http://digitalheraldry.org/digital-heraldry-ontology/objects#MSMSA\n", @@ -87711,8 +87831,7 @@ "http://digitalheraldry.org/digital-heraldry-ontology/objects#MSMSsc\n", "http://digitalheraldry.org/digital-heraldry-ontology/objects#MSMSwj\n", "http://digitalheraldry.org/digital-heraldry-ontology/objects#MSMSwln\n", - "Created data/rdf-output/research-dataset/digital-heraldry-knowledge-graph-research-dataset.ttl\n", - "Created data/rdf-output/research-dataset/digital-heraldry-knowledge-graph-research-dataset.jsonld\n" + "Serialilzing representation graph...\n" ] } ], @@ -87722,42 +87841,20 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 32, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Merging data/rdf-output/research-dataset/digital-heraldry-knowledge-graph-research-dataset.ttl into Knowledge Graph\n", - "Merging data/ontologies/digital-heraldry-ontology-representation.ttl into Knowledge Graph\n", - "Created data/rdf-output/research-dataset/digital-heraldry-knowledge-graph-research-dataset.ttl\n", - "Created data/rdf-output/research-dataset/digital-heraldry-knowledge-graph-research-dataset.jsonld\n" - ] - } - ], + "outputs": [], "source": [ - "!python src/rdf-mappings/merge_rdf_files_into_kg.py -i data/rdf-output/research-dataset/digital-heraldry-knowledge-graph-research-dataset.ttl data/ontologies/digital-heraldry-ontology-representation.ttl" + "merge_graphs('data/rdf-output/research-dataset/knowledge-graph-objects-research-dataset.ttl', 'data/ontologies/digital-heraldry-ontology-objects.ttl', 'data/rdf-output/research-dataset/knowledge-graph-objects-research-dataset.ttl')" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Merging data/rdf-output/research-dataset/digital-heraldry-knowledge-graph-research-dataset.ttl into Knowledge Graph\n", - "Merging data/ontologies/digital-heraldry-ontology-objects.ttl into Knowledge Graph\n", - "Created data/rdf-output/research-dataset/digital-heraldry-knowledge-graph-research-dataset.ttl\n", - "Created data/rdf-output/research-dataset/digital-heraldry-knowledge-graph-research-dataset.jsonld\n" - ] - } - ], + "outputs": [], "source": [ - "!python src/rdf-mappings/merge_rdf_files_into_kg.py -i data/rdf-output/research-dataset/digital-heraldry-knowledge-graph-research-dataset.ttl data/ontologies/digital-heraldry-ontology-objects.ttl" + "!python src/rdf-mappings/merge_rdf_files_into_kg.py -i data/rdf-output/research-dataset/knowledge-graph-blazon-research-dataset.ttl data/rdf-output/research-dataset/knowledge-graph-objects-research-dataset.ttl data/rdf-output/research-dataset/knowledge-graph-representation-research-dataset.ttl" ] } ],