{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Pipeline to create a small test dataset\n",
    "Goal: Check if changes to transformations scripts or to the ontologies apply correctly in the data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from rdflib import Graph\n",
    "\n",
    "def merge_graphs(input_path_1, input_path_2, output_path):\n",
    "    g = Graph()\n",
    "    g.parse(input_path_1)\n",
    "    g.parse(input_path_2)\n",
    "    g.serialize(destination=output_path, format='ttl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "src/rdf-mappings/map-tblBranch.py:502: DtypeWarning: Columns (2,9,11,16,17,18,19,20,21,31,38,39,49,56,58,60,62,67,71,72,73,74,75,77,79,82,85,86,88,94,97,98,99,100,102,104,106,109,110,112,114,117,118,119,122,124,125,126,127,128,129,130,131,132,133,134,135,136,138,139,140,141,142,143,144,145,149,150,151,152,153,154,157,165,166,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,188,189,190,194,196) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  df_tblBranch = pd.read_csv(csv_input_path, sep='\\t')\n",
      "       Column  Column2  BranchID  ... GpSel TinctChanged  Column 201\n",
      "0           0      NaN         1  ...   NaN          1.0         NaN\n",
      "1           1      NaN         2  ...   NaN          1.0         NaN\n",
      "2           2      NaN         3  ...   NaN          1.0         NaN\n",
      "3           3      NaN         4  ...   NaN          1.0         NaN\n",
      "4           4      NaN         5  ...   NaN          1.0         NaN\n",
      "...       ...      ...       ...  ...   ...          ...         ...\n",
      "40859   40859      NaN  8318_A_1  ...   NaN          NaN         NaN\n",
      "40860   40860      NaN    8425_1  ...   0.0          NaN         0.0\n",
      "40861   40861      NaN  8425_A_1  ...   NaN          NaN         NaN\n",
      "40862   40862      NaN  8425_B_1  ...   NaN          NaN         NaN\n",
      "40863   40863      NaN   13770_1  ...   0.0          NaN         1.0\n",
      "\n",
      "[40864 rows x 205 columns]\n",
      "Created data/rdf-output/test-dataset/knowledge-graph-blazon-test-dataset.ttl\n",
      "Created data/rdf-output/test-dataset/knowledge-graph-blazon-test-dataset.jsonld\n",
      "Export TBox to file...\n",
      "Create data/rdf-output/digital-heraldry-ontology.ttl\n"
     ]
    }
   ],
   "source": [
    "!python src/rdf-mappings/map-tblBranch.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "merge_graphs('data/rdf-output/test-dataset/knowledge-graph-blazon-test-dataset.ttl', 'data/ontologies/digital-heraldry-ontology.ttl', 'data/rdf-output/test-dataset/knowledge-graph-blazon-test-dataset.ttl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "src/rdf-mappings/map-tblArmItems.py:149: FutureWarning: Inferring datetime64[ns] from data containing strings is deprecated and will be removed in a future version. To retain the old behavior explicitly pass Series(data, dtype=datetime64[ns])\n",
      "  df_metadata = pd.read_excel(metadata_file)\n",
      "      Column  ArmItemID ArmCode  ItemNr  ... CreMulti  CreTint  CreOrnIDR  TGmember\n",
      "0     3931.0     4052.0     BEL     1.0  ...      NaN      NaN        2.0     False\n",
      "1     3931-1     4052.0     BEL     1.0  ...      NaN      NaN        2.0     False\n",
      "2     3932.0     4053.0     BEL     2.0  ...      NaN      NaN        NaN     False\n",
      "3     3933.0     4054.0     BEL     3.0  ...      NaN      NaN        NaN     False\n",
      "4     3933-1     4054.0     BEL     3.0  ...      NaN      NaN        NaN     False\n",
      "..       ...        ...     ...     ...  ...      ...      ...        ...       ...\n",
      "809  35234.0    40702.0     NLU    76.0  ...      NaN        O        2.0     False\n",
      "810  43898.0    52699.0     NLU    77.0  ...      NaN        O        4.0     False\n",
      "811  43899.0    52700.0     NLU    78.0  ...      NaN        O        4.0     False\n",
      "812  43900.0    52701.0     NLU    79.0  ...      NaN        O        4.0     False\n",
      "813  43901.0    52702.0     NLU    80.0  ...      NaN        O        4.0     False\n",
      "\n",
      "[814 rows x 46 columns]\n",
      "src/rdf-mappings/map-tblArmItems.py:166: DtypeWarning: Columns (2,9,11,16,17,18,19,20,21,31,38,39,49,56,58,62,67,71,72,73,74,75,77,79,82,85,86,88,94,97,98,99,100,102,104,106,109,110,112,114,117,118,119,122,124,125,126,127,128,129,130,131,132,133,134,135,136,138,139,140,141,142,143,144,145,149,150,151,152,153,154,157,165,166,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,188,189,190,194,196) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  df_tblBranch = pd.read_csv('data/input/tblBranch_research-dataset.tsv', sep='\\t')\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "BEL\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "MAN\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "NLU\n",
      "Created data/rdf-output/test-dataset/knowledge-graph-representation-test-dataset.ttl\n",
      "Created data/rdf-output/test-dataset/knowledge-graph-representation-test-dataset.jsonld\n"
     ]
    }
   ],
   "source": [
    "!python src/rdf-mappings/map-tblArmItems.py -t"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "src/rdf-mappings/integrate_manuscript_metadata_into_kg.py:72: FutureWarning: Inferring datetime64[ns] from data containing strings is deprecated and will be removed in a future version. To retain the old behavior explicitly pass Series(data, dtype=datetime64[ns])\n",
      "  df_metadata = pd.read_excel(metadata_file)\n",
      "      dho-kg-id Bestellen  ... Zugriff_Internet.1 Herkunft\n",
      "16          NaN       NaN  ...                NaN  Summary\n",
      "270         NaN       NaN  ...                NaN  Summary\n",
      "281         NaN       NaN  ...                NaN  Summary\n",
      "509         NaN       NaN  ...                NaN  Summary\n",
      "602         NaN       NaN  ...                NaN  Summary\n",
      "607         NaN       NaN  ...                NaN  Summary\n",
      "926         NaN       NaN  ...                NaN  Summary\n",
      "1008        NaN       NaN  ...                NaN  Summary\n",
      "1199        NaN       NaN  ...                NaN  Summary\n",
      "\n",
      "[9 rows x 95 columns]\n",
      "Creating manuscript entities...\n",
      "Traceback (most recent call last):\n",
      "  File \"src/rdf-mappings/integrate_manuscript_metadata_into_kg.py\", line 105, in <module>\n",
      "    manuscript_identifier = str(int(functions.get_numerical_id_from_ms_metadata(df_metadata, row['ArmCode'])))\n",
      "ValueError: cannot convert float NaN to integer\n"
     ]
    }
   ],
   "source": [
    "!python src/rdf-mappings/integrate_manuscript_metadata_into_kg.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "http://172.28.34.103/!DOCTYPE html does not look like a valid URI, trying to serialize this will break.\n"
     ]
    },
    {
     "ename": "BadSyntax",
     "evalue": "at line 5 of <>:\nBad syntax (expected '.' or '}' or ']' at end of statement) at ^ in:\n\"b'<!DOCTYPE html>\\n<html>\\n\\n<head>\\n  '^b'<meta charset=\\'utf-8\\'>\\n  <link rel=\"icon\" href=\"/assets/no_a'...\"",
     "output_type": "error",
     "traceback": [
      "Traceback \u001b[0;36m(most recent call last)\u001b[0m:\n",
      "  File \u001b[1;32m~/opt/anaconda3/envs/dho_kg_integration/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3398\u001b[0m in \u001b[1;35mrun_code\u001b[0m\n    exec(code_obj, self.user_global_ns, self.user_ns)\n",
      "  Input \u001b[1;32mIn [7]\u001b[0m in \u001b[1;35m<cell line: 19>\u001b[0m\n    resp = g.parse(endpoint, format='text/turtle')\n",
      "  File \u001b[1;32m~/opt/anaconda3/envs/dho_kg_integration/lib/python3.8/site-packages/rdflib/graph.py:1267\u001b[0m in \u001b[1;35mparse\u001b[0m\n    raise se\n",
      "  File \u001b[1;32m~/opt/anaconda3/envs/dho_kg_integration/lib/python3.8/site-packages/rdflib/graph.py:1258\u001b[0m in \u001b[1;35mparse\u001b[0m\n    parser.parse(source, self, **args)  # type: ignore[call-arg]\n",
      "  File \u001b[1;32m~/opt/anaconda3/envs/dho_kg_integration/lib/python3.8/site-packages/rdflib/plugins/parsers/notation3.py:1913\u001b[0m in \u001b[1;35mparse\u001b[0m\n    p.loadStream(stream)\n",
      "  File \u001b[1;32m~/opt/anaconda3/envs/dho_kg_integration/lib/python3.8/site-packages/rdflib/plugins/parsers/notation3.py:434\u001b[0m in \u001b[1;35mloadStream\u001b[0m\n    return self.loadBuf(stream.read())  # Not ideal\n",
      "  File \u001b[1;32m~/opt/anaconda3/envs/dho_kg_integration/lib/python3.8/site-packages/rdflib/plugins/parsers/notation3.py:440\u001b[0m in \u001b[1;35mloadBuf\u001b[0m\n    self.feed(buf)\n",
      "  File \u001b[1;32m~/opt/anaconda3/envs/dho_kg_integration/lib/python3.8/site-packages/rdflib/plugins/parsers/notation3.py:466\u001b[0m in \u001b[1;35mfeed\u001b[0m\n    i = self.directiveOrStatement(s, j)\n",
      "  File \u001b[1;32m~/opt/anaconda3/envs/dho_kg_integration/lib/python3.8/site-packages/rdflib/plugins/parsers/notation3.py:488\u001b[0m in \u001b[1;35mdirectiveOrStatement\u001b[0m\n    return self.checkDot(argstr, j)\n",
      "  File \u001b[1;32m~/opt/anaconda3/envs/dho_kg_integration/lib/python3.8/site-packages/rdflib/plugins/parsers/notation3.py:1149\u001b[0m in \u001b[1;35mcheckDot\u001b[0m\n    self.BadSyntax(argstr, j, \"expected '.' or '}' or ']' at end of statement\")\n",
      "\u001b[0;36m  File \u001b[0;32m~/opt/anaconda3/envs/dho_kg_integration/lib/python3.8/site-packages/rdflib/plugins/parsers/notation3.py:1646\u001b[0;36m in \u001b[0;35mBadSyntax\u001b[0;36m\u001b[0m\n\u001b[0;31m    raise BadSyntax(self._thisDoc, self.lines, argstr, i, msg)\u001b[0m\n",
      "\u001b[0;36m  File \u001b[0;32m<string>\u001b[0;36m\u001b[0m\n\u001b[0;31mBadSyntax\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "from rdflib import Graph\n",
    "from rdflib.plugins.sparql import prepareQuery\n",
    "\n",
    "# Define the SPARQL query\n",
    "query = prepareQuery('''\n",
    "    SELECT ?subject ?value\n",
    "    WHERE {\n",
    "        ?subject <http://digitalheraldry.org/digital-heraldry-ontology/entity#attributesCoATo> ?value .\n",
    "    }\n",
    "    ''')\n",
    "\n",
    "# Define the endpoint URL\n",
    "endpoint = 'http://172.28.34.103/sparql'\n",
    "\n",
    "# Create a new RDF graph\n",
    "g = Graph()\n",
    "\n",
    "# Send a GET request to the SPARQL endpoint and parse the response as RDF/XML\n",
    "resp = g.parse(endpoint, format='text/turtle')\n",
    "\n",
    "# Check whether the parse method returned a valid RDF graph object\n",
    "if not resp:\n",
    "    raise ValueError('Unable to parse RDF data from the endpoint')\n",
    "\n",
    "# Bind the graph to the namespace\n",
    "g.bind('dh', 'http://digitalheraldry.org/digital-heraldry-ontology/entity#')\n",
    "\n",
    "# Send the SPARQL query to the endpoint and print the results\n",
    "for row in g.query(query):\n",
    "    print(row)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "dho_kg_integration",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "86152e4984a8eeddffd673b8a40ea9a3bfe66b19f1d4e2fbcbd65936a43d9c06"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}