Newer
Older
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Pipeline to create research dataset"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from rdflib import Graph\n",
"\n",
"def merge_graphs(input_path_1, input_path_2, output_path):\n",
" g = Graph()\n",
" g.parse(input_path_1)\n",
" g.parse(input_path_2)\n",
" g.serialize(destination=output_path, format='ttl')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/home/jovyan/dho-knowledge-graph-data-integration/src/rdf-mappings/map-tblBranch.py:463: DtypeWarning: Columns (2,9,11,16,17,18,19,20,21,31,38,39,49,56,58,62,67,71,72,73,74,75,77,79,82,85,86,88,94,97,98,99,100,102,104,106,109,110,112,114,117,118,119,122,124,125,126,127,128,129,130,131,132,133,134,135,136,138,139,140,141,142,143,144,145,149,150,151,152,153,154,157,165,166,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,188,189,190,194,196) have mixed types. Specify dtype option on import or set low_memory=False.\n",
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
" df_tblBranch = pd.read_csv(csv_input_path, sep='\\t')\n",
" Column Column2 BranchID ... GpSel TinctChanged Column 201\n",
"0 0 NaN 1 ... NaN 1.0 NaN\n",
"1 1 NaN 2 ... NaN 1.0 NaN\n",
"2 2 NaN 3 ... NaN 1.0 NaN\n",
"3 3 NaN 4 ... NaN 1.0 NaN\n",
"4 4 NaN 5 ... NaN 1.0 NaN\n",
"... ... ... ... ... ... ... ...\n",
"40859 40859 NaN 8318_A_1 ... NaN NaN NaN\n",
"40860 40860 NaN 8425_1 ... 0.0 NaN 0.0\n",
"40861 40861 NaN 8425_A_1 ... NaN NaN NaN\n",
"40862 40862 NaN 8425_B_1 ... NaN NaN NaN\n",
"40863 40863 NaN 13770_1 ... 0.0 NaN 1.0\n",
"\n",
"[40864 rows x 205 columns]\n",
"chief\n",
"compon\n",
"chief\n",
"chief\n",
"chief\n",
"chief\n",
"chief\n",
"eschutch\n",
"chief\n",
"chief\n",
"chief\n",
"chief\n",
"chief\n",
"shield\n",
"stag\n",
"compon de Harcourt\n",
"chief\n",
"chief\n",
"chief\n",
"\n",
"chief\n",
"border\n",
"lozenge\n",
"lozenge\n",
"\n",
"\n",
"chief\n",
"chief\n",
"lozenge\n",
"chief\n",
"chief\n",
"compon\n",
"compon\n",
"escuch\n",
"chief\n",
"\n",
"\n",
"escurch\n",
"banner\n",
"chief\n",
"banner\n",
"banner\n",
"banner\n",
"banner\n",
"banner\n",
"escuch\n",
"Created data/rdf-output/research-dataset/knowledge-graph-blazon-research-dataset.ttl\n",
"Created data/rdf-output/research-dataset/knowledge-graph-blazon-research-dataset.jsonld\n",
"Export TBox to file...\n",
"Create data/rdf-output/digital-heraldry-ontology.ttl\n"
]
}
],
"source": [
"!python src/rdf-mappings/map-tblBranch.py"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"tags": []
},
"merge_graphs('data/rdf-output/research-dataset/knowledge-graph-blazon-research-dataset.ttl', 'data/ontologies/digital-heraldry-ontology-heraldry.ttl', 'data/rdf-output/research-dataset/knowledge-graph-blazon-research-dataset.ttl')"
"source": [
"Add class structure for `dhoh:Modifier`s"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"merge_graphs('data/rdf-output/research-dataset/knowledge-graph-blazon-research-dataset.ttl', 'data/ontologies/research-dataset/modifier-class-structure.ttl', 'data/rdf-output/research-dataset/knowledge-graph-blazon-research-dataset.ttl')"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/home/jovyan/dho-knowledge-graph-data-integration/src/rdf-mappings/map-tblArmItems.py:79: DtypeWarning: Columns (0,7,10,11,13,14,42) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df_tblArmItems = pd.read_csv(csv_input_path, sep='\\t')\n",
" Column ArmItemID ArmCode ItemNr ... CreMulti CreTint CreOrnIDR TGmember\n",
"0 18208.0 21653.0 A 58.0 ... NaN NaN NaN False\n",
"1 18227.0 21672.0 A 323.0 ... NaN NaN NaN False\n",
"2 18278.0 21727.0 A 160.0 ... NaN NaN NaN False\n",
"3 18333.0 21787.0 A 211.0 ... NaN NaN NaN False\n",
"4 18344.0 21798.0 A 82.0 ... NaN NaN NaN False\n",
"... ... ... ... ... ... ... ... ... ...\n",
"87832 33804.0 39070.0 ZUR 586.0 ... NaN NaN NaN False\n",
"87833 33805.0 39071.0 ZUR 587.0 ... NaN NaN NaN False\n",
"87834 4184.0 4306.0 BEL 360.0 ... NaN NaN NaN False\n",
"87835 11106-1 12011.0 BEL 1447.0 ... NaN NaN NaN False\n",
"87836 11214-1 12135.0 BEL 1513.0 ... NaN SO 2.0 False\n",
"\n",
"[87837 rows x 46 columns]\n",
"Created data/rdf-output/research-dataset/knowledge-graph-representation-research-dataset.ttl\n",
"Created data/rdf-output/research-dataset/knowledge-graph-representation-research-dataset.jsonld\n"
]
}
],
"source": [
"!python src/rdf-mappings/map-tblArmItems.py"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"merge_graphs('data/rdf-output/research-dataset/knowledge-graph-representation-research-dataset.ttl' , 'data/ontologies/digital-heraldry-ontology-representation.ttl', 'data/rdf-output/research-dataset/knowledge-graph-representation-research-dataset.ttl')"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Bestellen originDATA ... Zugriff_Internet.1 Herkunft\n",
"0 NaN TH ... NaN PdW_Datenbank\n",
"1 NaN Other ... NaN Summary\n",
Loading
Loading full blame...