Newer
Older
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Pipeline to create research dataset"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"from rdflib import Graph\n",
"\n",
"def merge_graphs(input_path_1, input_path_2, output_path):\n",
" g = Graph()\n",
" g.parse(input_path_1)\n",
" g.parse(input_path_2)\n",
" g.serialize(destination=output_path, format='ttl')"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
"src/rdf-mappings/map-tblBranch.py:381: DtypeWarning: Columns (2,9,11,16,17,18,19,20,21,31,38,39,49,56,58,62,67,71,72,73,74,75,77,79,82,85,86,88,94,97,98,99,100,102,104,106,109,110,112,114,117,118,119,122,124,125,126,127,128,129,130,131,132,133,134,135,136,138,139,140,141,142,143,144,145,149,150,151,152,153,154,157,165,166,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,188,189,190,194,196) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df_tblBranch = pd.read_csv(csv_input_path, sep='\\t')\n",
" Column Column2 BranchID ... GpSel TinctChanged Column 201\n",
"0 0 NaN 1 ... NaN 1.0 NaN\n",
"1 1 NaN 2 ... NaN 1.0 NaN\n",
"2 2 NaN 3 ... NaN 1.0 NaN\n",
"3 3 NaN 4 ... NaN 1.0 NaN\n",
"4 4 NaN 5 ... NaN 1.0 NaN\n",
"... ... ... ... ... ... ... ...\n",
"40859 40859 NaN 8318_A_1 ... NaN NaN NaN\n",
"40860 40860 NaN 8425_1 ... 0.0 NaN 0.0\n",
"40861 40861 NaN 8425_A_1 ... NaN NaN NaN\n",
"40862 40862 NaN 8425_B_1 ... NaN NaN NaN\n",
"40863 40863 NaN 13770_1 ... 0.0 NaN 1.0\n",
"\n",
"[40864 rows x 205 columns]\n",
"chief\n",
"compon\n",
"chief\n",
"chief\n",
"chief\n",
"chief\n",
"chief\n",
"eschutch\n",
"chief\n",
"chief\n",
"chief\n",
"chief\n",
"chief\n",
"shield\n",
"stag\n",
"compon de Harcourt\n",
"chief\n",
"chief\n",
"chief\n",
"\n",
"chief\n",
"border\n",
"lozenge\n",
"lozenge\n",
"\n",
"\n",
"chief\n",
"chief\n",
"lozenge\n",
"chief\n",
"chief\n",
"compon\n",
"compon\n",
"escuch\n",
"chief\n",
"\n",
"\n",
"escurch\n",
"banner\n",
"chief\n",
"banner\n",
"banner\n",
"banner\n",
"banner\n",
"banner\n",
"escuch\n",
"Created data/rdf-output/research-dataset/knowledge-graph-blazon-research-dataset.ttl\n",
"Created data/rdf-output/research-dataset/knowledge-graph-blazon-research-dataset.jsonld\n",
"Export TBox to file...\n",
"Create data/rdf-output/digital-heraldry-ontology.ttl\n"
]
}
],
"source": [
"!python src/rdf-mappings/map-tblBranch.py"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"merge_graphs('data/rdf-output/research-dataset/knowledge-graph-blazon-research-dataset.ttl', 'data/ontologies/digital-heraldry-ontology.ttl', 'data/rdf-output/research-dataset/knowledge-graph-blazon-research-dataset.ttl')"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"merge_graphs('data/rdf-output/research-dataset/knowledge-graph-blazon-research-dataset.ttl', 'data/ontologies/research-dataset/modifier-class-structure.ttl', 'data/rdf-output/research-dataset/knowledge-graph-blazon-research-dataset.ttl')"
]
},
{
"cell_type": "code",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"src/rdf-mappings/map-tblArmItems.py:73: DtypeWarning: Columns (0,7,10,11,13,14,42) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df_tblArmItems = pd.read_csv(csv_input_path, sep='\\t')\n",
" Column ArmItemID ArmCode ItemNr ... CreMulti CreTint CreOrnIDR TGmember\n",
"0 18208.0 21653.0 A 58.0 ... NaN NaN NaN False\n",
"1 18227.0 21672.0 A 323.0 ... NaN NaN NaN False\n",
"2 18278.0 21727.0 A 160.0 ... NaN NaN NaN False\n",
"3 18333.0 21787.0 A 211.0 ... NaN NaN NaN False\n",
"4 18344.0 21798.0 A 82.0 ... NaN NaN NaN False\n",
"... ... ... ... ... ... ... ... ... ...\n",
"87832 33804.0 39070.0 ZUR 586.0 ... NaN NaN NaN False\n",
"87833 33805.0 39071.0 ZUR 587.0 ... NaN NaN NaN False\n",
"87834 4184.0 4306.0 BEL 360.0 ... NaN NaN NaN False\n",
"87835 11106-1 12011.0 BEL 1447.0 ... NaN NaN NaN False\n",
"87836 11214-1 12135.0 BEL 1513.0 ... NaN SO 2.0 False\n",
"\n",
"[87837 rows x 46 columns]\n",
"Created data/rdf-output/research-dataset/knowledge-graph-representation-research-dataset.ttl\n",
"Created data/rdf-output/research-dataset/knowledge-graph-representation-research-dataset.jsonld\n"
]
}
],
"source": [
"!python src/rdf-mappings/map-tblArmItems.py"
]
},
{
"cell_type": "code",
"metadata": {},
"outputs": [],
"source": [
"merge_graphs('data/rdf-output/research-dataset/knowledge-graph-representation-research-dataset.ttl' , 'data/ontologies/digital-heraldry-ontology-representation.ttl', 'data/rdf-output/research-dataset/knowledge-graph-representation-research-dataset.ttl')"
]
},
{
"cell_type": "code",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ArmCode CoACount ... armorial-position-in-ms grid-system\n",
"0 A 324.0 ... NaN NaN\n",
"1 ABL NaN ... NaN NaN\n",
"2 ABR NaN ... NaN NaN\n",
"3 ACA NaN ... NaN NaN\n",
"4 ACS NaN ... NaN NaN\n",
".. ... ... ... ... ...\n",
"772 NaN NaN ... NaN NaN\n",
"773 NaN NaN ... NaN NaN\n",
"774 NaN NaN ... NaN NaN\n",
"775 NaN NaN ... NaN NaN\n",
"\n",
"Creating manuscript entities...\n",
"Created data/rdf-output/research-dataset/knowledge-graph-objects-research-dataset.ttl\n",
"Created data/rdf-output/research-dataset/knowledge-graph-objects-research-dataset.jsonld\n",
"Loading Knowledge Graph file...\n",
"Linking coat of arms representations to manuscript entities...\n",
"http://digitalheraldry.org/digital-heraldry-ontology/objects#MSMSA\n",
"http://digitalheraldry.org/digital-heraldry-ontology/objects#MSMSA\n",
"http://digitalheraldry.org/digital-heraldry-ontology/objects#MSMSA\n",
"http://digitalheraldry.org/digital-heraldry-ontology/objects#MSMSA\n",
"http://digitalheraldry.org/digital-heraldry-ontology/objects#MSMSA\n",
Loading
Loading full blame...