Skip to content
Snippets Groups Projects
research-dataset-creation-pipeline.ipynb 6.67 MiB
Newer Older
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Pipeline to create research dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "from rdflib import Graph\n",
    "\n",
    "def merge_graphs(input_path_1, input_path_2, output_path):\n",
    "    g = Graph()\n",
    "    g.parse(input_path_1)\n",
    "    g.parse(input_path_2)\n",
    "    g.serialize(destination=output_path, format='ttl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "src/rdf-mappings/map-tblBranch.py:381: DtypeWarning: Columns (2,9,11,16,17,18,19,20,21,31,38,39,49,56,58,62,67,71,72,73,74,75,77,79,82,85,86,88,94,97,98,99,100,102,104,106,109,110,112,114,117,118,119,122,124,125,126,127,128,129,130,131,132,133,134,135,136,138,139,140,141,142,143,144,145,149,150,151,152,153,154,157,165,166,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,188,189,190,194,196) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  df_tblBranch = pd.read_csv(csv_input_path, sep='\\t')\n",
      "       Column  Column2  BranchID  ... GpSel TinctChanged  Column 201\n",
      "0           0      NaN         1  ...   NaN          1.0         NaN\n",
      "1           1      NaN         2  ...   NaN          1.0         NaN\n",
      "2           2      NaN         3  ...   NaN          1.0         NaN\n",
      "3           3      NaN         4  ...   NaN          1.0         NaN\n",
      "4           4      NaN         5  ...   NaN          1.0         NaN\n",
      "...       ...      ...       ...  ...   ...          ...         ...\n",
      "40859   40859      NaN  8318_A_1  ...   NaN          NaN         NaN\n",
      "40860   40860      NaN    8425_1  ...   0.0          NaN         0.0\n",
      "40861   40861      NaN  8425_A_1  ...   NaN          NaN         NaN\n",
      "40862   40862      NaN  8425_B_1  ...   NaN          NaN         NaN\n",
      "40863   40863      NaN   13770_1  ...   0.0          NaN         1.0\n",
      "\n",
      "[40864 rows x 205 columns]\n",
      "chief\n",
      "compon\n",
      "chief\n",
      "chief\n",
      "chief\n",
      "chief\n",
      "chief\n",
      "eschutch\n",
      "chief\n",
      "chief\n",
      "chief\n",
      "chief\n",
      "chief\n",
      "shield\n",
      "stag\n",
      "compon de Harcourt\n",
      "chief\n",
      "chief\n",
      "chief\n",
      "\n",
      "chief\n",
      "border\n",
      "lozenge\n",
      "lozenge\n",
      "\n",
      "\n",
      "chief\n",
      "chief\n",
      "lozenge\n",
      "chief\n",
      "chief\n",
      "compon\n",
      "compon\n",
      "escuch\n",
      "chief\n",
      "\n",
      "\n",
      "escurch\n",
      "banner\n",
      "chief\n",
      "banner\n",
      "banner\n",
      "banner\n",
      "banner\n",
      "banner\n",
      "escuch\n",
      "Created data/rdf-output/research-dataset/knowledge-graph-blazon-research-dataset.ttl\n",
      "Created data/rdf-output/research-dataset/knowledge-graph-blazon-research-dataset.jsonld\n",
      "Export TBox to file...\n",
      "Create data/rdf-output/digital-heraldry-ontology.ttl\n"
    "!python src/rdf-mappings/map-tblBranch.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "merge_graphs('data/rdf-output/research-dataset/knowledge-graph-blazon-research-dataset.ttl', 'data/ontologies/digital-heraldry-ontology.ttl', 'data/rdf-output/research-dataset/knowledge-graph-blazon-research-dataset.ttl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "merge_graphs('data/rdf-output/research-dataset/knowledge-graph-blazon-research-dataset.ttl', 'data/ontologies/research-dataset/modifier-class-structure.ttl', 'data/rdf-output/research-dataset/knowledge-graph-blazon-research-dataset.ttl')"
Philipp Schneider's avatar
Philipp Schneider committed
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "src/rdf-mappings/map-tblArmItems.py:73: DtypeWarning: Columns (0,7,10,11,13,14,42) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  df_tblArmItems = pd.read_csv(csv_input_path, sep='\\t')\n",
      "        Column  ArmItemID ArmCode  ItemNr  ... CreMulti  CreTint  CreOrnIDR TGmember\n",
      "0      18208.0    21653.0       A    58.0  ...      NaN      NaN        NaN    False\n",
      "1      18227.0    21672.0       A   323.0  ...      NaN      NaN        NaN    False\n",
      "2      18278.0    21727.0       A   160.0  ...      NaN      NaN        NaN    False\n",
      "3      18333.0    21787.0       A   211.0  ...      NaN      NaN        NaN    False\n",
      "4      18344.0    21798.0       A    82.0  ...      NaN      NaN        NaN    False\n",
      "...        ...        ...     ...     ...  ...      ...      ...        ...      ...\n",
      "87832  33804.0    39070.0     ZUR   586.0  ...      NaN      NaN        NaN    False\n",
      "87833  33805.0    39071.0     ZUR   587.0  ...      NaN      NaN        NaN    False\n",
      "87834   4184.0     4306.0     BEL   360.0  ...      NaN      NaN        NaN    False\n",
      "87835  11106-1    12011.0     BEL  1447.0  ...      NaN      NaN        NaN    False\n",
      "87836  11214-1    12135.0     BEL  1513.0  ...      NaN       SO        2.0    False\n",
      "\n",
      "[87837 rows x 46 columns]\n",
      "Created data/rdf-output/research-dataset/knowledge-graph-representation-research-dataset.ttl\n",
      "Created data/rdf-output/research-dataset/knowledge-graph-representation-research-dataset.jsonld\n"
    "!python src/rdf-mappings/map-tblArmItems.py"
   ]
  },
  {
   "cell_type": "code",
Philipp Schneider's avatar
Philipp Schneider committed
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "merge_graphs('data/rdf-output/research-dataset/knowledge-graph-representation-research-dataset.ttl' , 'data/ontologies/digital-heraldry-ontology-representation.ttl', 'data/rdf-output/research-dataset/knowledge-graph-representation-research-dataset.ttl')"
Philipp Schneider's avatar
Philipp Schneider committed
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    ArmCode  CoACount  ... armorial-position-in-ms grid-system\n",
      "0         A     324.0  ...                     NaN         NaN\n",
      "1       ABL       NaN  ...                     NaN         NaN\n",
      "2       ABR       NaN  ...                     NaN         NaN\n",
      "3       ACA       NaN  ...                     NaN         NaN\n",
      "4       ACS       NaN  ...                     NaN         NaN\n",
      "771     NaN       NaN  ...                     NaN         NaN\n",
      "772     NaN       NaN  ...                     NaN         NaN\n",
      "773     NaN       NaN  ...                     NaN         NaN\n",
      "774     NaN       NaN  ...                     NaN         NaN\n",
      "775     NaN       NaN  ...                     NaN         NaN\n",
      "\n",
      "[776 rows x 36 columns]\n",
      "Creating manuscript entities...\n",
      "Created data/rdf-output/research-dataset/knowledge-graph-objects-research-dataset.ttl\n",
      "Created data/rdf-output/research-dataset/knowledge-graph-objects-research-dataset.jsonld\n",
      "Loading Knowledge Graph file...\n",
      "Linking coat of arms representations to manuscript entities...\n",
      "http://digitalheraldry.org/digital-heraldry-ontology/objects#MSMSA\n",
      "http://digitalheraldry.org/digital-heraldry-ontology/objects#MSMSA\n",
      "http://digitalheraldry.org/digital-heraldry-ontology/objects#MSMSA\n",
      "http://digitalheraldry.org/digital-heraldry-ontology/objects#MSMSA\n",
      "http://digitalheraldry.org/digital-heraldry-ontology/objects#MSMSA\n",
Loading
Loading full blame...