From cd0fbe6d958f6f183833dddf840d172820b08729 Mon Sep 17 00:00:00 2001
From: Philipp Schneider <schneider.philipp@uni-muenster.de>
Date: Mon, 8 Aug 2022 17:55:44 +0200
Subject: [PATCH] Add functionality to integrate coa identifications from
 tblArmItems

---
 src/rdf-mappings/map-tblArmItems.py | 60 ++++++++++++++++++++++++++++-
 1 file changed, 58 insertions(+), 2 deletions(-)

diff --git a/src/rdf-mappings/map-tblArmItems.py b/src/rdf-mappings/map-tblArmItems.py
index 071ab8f..15cc96e 100644
--- a/src/rdf-mappings/map-tblArmItems.py
+++ b/src/rdf-mappings/map-tblArmItems.py
@@ -10,6 +10,7 @@ from rdflib.namespace import RDF, RDFS, Namespace, OWL, DCTERMS, XSD
 import pandas as pd
 import uuid
 import json
+import re
 
 from dho_namespaces import *
 import functions
@@ -37,6 +38,32 @@ def add_data_to_description_event(description_event_node, coa_node, row, person_
     
     g.add((description_event_node, DCTERMS.creator, person_uri))
 
+def create_identified_entity(row):
+    """
+    Creates a node of type dhoe:IdentifiedEntity from the identification provided in tblArmItems.
+    Returns the URI of the newly created node.
+    """
+
+    new_class_name = row['ArmIdf']
+
+    # Check if string starts with a letter
+    if not pd.isnull(row['ArmIdf']) and re.search("^[a-zA-Z]", new_class_name) is not None:
+        new_class_name = functions.clean_class_name_string(new_class_name)
+        new_class_name = str(new_class_name).title()
+        new_class_name = new_class_name.replace(' ', '')
+
+        new_entity_node = URIRef(dho_ent_n + new_class_name)
+        g.add((new_entity_node, RDF.type, dho_ent_n.IdentifiedEntity))
+        g.add((new_entity_node, RDF.type, OWL.NamedIndividual))
+
+        g.add((new_entity_node, dho_ent_n.hasAppellation, Literal(row['ArmIdf'], datatype=XSD.string)))
+
+        print(new_entity_node)
+
+        return new_entity_node
+    else:
+        return None
+
 # Load config file
 with open("config/config-map-tblArmItems.json", "r") as config_file:
     mapping_config = json.load(config_file)
@@ -59,8 +86,11 @@ g = Graph()
 g = bind_namespaces(g)
 
 # Load the existing Knowledge Graph and integrate the new data there, if stated so in the config file
-if mapping_config['existing_ontology'] is not None:
-    g.parse(mapping_config['existing_ontology'])
+if mapping_config['existing_representation_ontology'] is not None:
+    g.parse(mapping_config['existing_representation_ontology'])
+
+if mapping_config['existing_entity_ontology'] is not None:
+    g.parse(mapping_config['existing_entity_ontology'])
 
 # Define initial classes and properties if the respective option is set in the config file
 if mapping_config['initial_ontology_definitions'] != None:
@@ -75,6 +105,8 @@ for index, row in df_tblArmItems.iterrows():
     g.add((coa_reference_node, RDF.type, dho_rep_n.CoatOfArmsRepresentation))
     g.add((coa_reference_node, RDF.type, OWL.NamedIndividual))
 
+    g.add((coa_reference_node, dho_rep_n.hasOMAid, Literal(str(int(row['ArmItemID'])), datatype=XSD.string)))
+
     if not pd.isnull(row['ItemNr']):
         g.add((coa_reference_node, dho_rep_n.hasItemID, Literal(int(row['ItemNr']))))
 
@@ -122,8 +154,32 @@ for index, row in df_tblArmItems.iterrows():
     if not pd.isnull(row['description_time']):
         g.add((description_event_node, DCTERMS.date, Literal(str(row['description_time']), datatype=XSD.date)))
 
+    # Add textual transcription from source
     g.add((coa_reference_node, dho_rep_n.textHasPrimaryTranscription, Literal(str(row['ArmTxt']))))
 
+    # CoA identification
+    # Create node for identified entity
+    identified_entity = create_identified_entity(row)
+
+    if identified_entity is not None:
+        # Create identification event
+        identification_event_node = URIRef(dho_rep_n + str(uuid.uuid4()))
+        g.add((identification_event_node, RDF.type, dho_ent_n.CoAIdentificationAct))
+        g.add((identification_event_node, RDF.type, OWL.NamedIndividual))
+        g.add((coa_reference_node, dho_ent_n.hasIdentificationAct, identification_event_node))
+
+        g.add((identification_event_node, dho_ent_n.attributesCoATo, identified_entity))
+        g.add((coa_reference_node, dho_ent_n.hasPrimaryIdentification, identified_entity))
+
+        # Link identification to Steen Clemmensen (since data transformation only concerns his coa identifications)
+        person_uri = URIRef(dho_data_n + 'Steen-Clemmensen')
+        g.add((identification_event_node, DCTERMS.creator, person_uri))
+
+        # Add user comment by Steen Clemmensen
+        if not pd.isnull(row['AiCmnt']):
+            g.add((identification_event_node, dho_ent_n.hasAnnotatorComment, Literal(row['AiCmnt'], datatype=XSD.string)))
+
+
     # VORGEHEN
     # ArmItem-ID als Legacy übernehmen
     # daraus (vermutlich) eine eindeutige Nachweis-ID machen (Schema dafür dann, wie mit TH besprochen)
-- 
GitLab