From cd0fbe6d958f6f183833dddf840d172820b08729 Mon Sep 17 00:00:00 2001 From: Philipp Schneider <schneider.philipp@uni-muenster.de> Date: Mon, 8 Aug 2022 17:55:44 +0200 Subject: [PATCH] Add functionality to integrate coa identifications from tblArmItems --- src/rdf-mappings/map-tblArmItems.py | 60 ++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 2 deletions(-) diff --git a/src/rdf-mappings/map-tblArmItems.py b/src/rdf-mappings/map-tblArmItems.py index 071ab8f..15cc96e 100644 --- a/src/rdf-mappings/map-tblArmItems.py +++ b/src/rdf-mappings/map-tblArmItems.py @@ -10,6 +10,7 @@ from rdflib.namespace import RDF, RDFS, Namespace, OWL, DCTERMS, XSD import pandas as pd import uuid import json +import re from dho_namespaces import * import functions @@ -37,6 +38,32 @@ def add_data_to_description_event(description_event_node, coa_node, row, person_ g.add((description_event_node, DCTERMS.creator, person_uri)) +def create_identified_entity(row): + """ + Creates a node of type dhoe:IdentifiedEntity from the identification provided in tblArmItems. + Returns the URI of the newly created node. + """ + + new_class_name = row['ArmIdf'] + + # Check if string starts with a letter + if not pd.isnull(row['ArmIdf']) and re.search("^[a-zA-Z]", new_class_name) is not None: + new_class_name = functions.clean_class_name_string(new_class_name) + new_class_name = str(new_class_name).title() + new_class_name = new_class_name.replace(' ', '') + + new_entity_node = URIRef(dho_ent_n + new_class_name) + g.add((new_entity_node, RDF.type, dho_ent_n.IdentifiedEntity)) + g.add((new_entity_node, RDF.type, OWL.NamedIndividual)) + + g.add((new_entity_node, dho_ent_n.hasAppellation, Literal(row['ArmIdf'], datatype=XSD.string))) + + print(new_entity_node) + + return new_entity_node + else: + return None + # Load config file with open("config/config-map-tblArmItems.json", "r") as config_file: mapping_config = json.load(config_file) @@ -59,8 +86,11 @@ g = Graph() g = bind_namespaces(g) # Load the existing Knowledge Graph and integrate the new data there, if stated so in the config file -if mapping_config['existing_ontology'] is not None: - g.parse(mapping_config['existing_ontology']) +if mapping_config['existing_representation_ontology'] is not None: + g.parse(mapping_config['existing_representation_ontology']) + +if mapping_config['existing_entity_ontology'] is not None: + g.parse(mapping_config['existing_entity_ontology']) # Define initial classes and properties if the respective option is set in the config file if mapping_config['initial_ontology_definitions'] != None: @@ -75,6 +105,8 @@ for index, row in df_tblArmItems.iterrows(): g.add((coa_reference_node, RDF.type, dho_rep_n.CoatOfArmsRepresentation)) g.add((coa_reference_node, RDF.type, OWL.NamedIndividual)) + g.add((coa_reference_node, dho_rep_n.hasOMAid, Literal(str(int(row['ArmItemID'])), datatype=XSD.string))) + if not pd.isnull(row['ItemNr']): g.add((coa_reference_node, dho_rep_n.hasItemID, Literal(int(row['ItemNr'])))) @@ -122,8 +154,32 @@ for index, row in df_tblArmItems.iterrows(): if not pd.isnull(row['description_time']): g.add((description_event_node, DCTERMS.date, Literal(str(row['description_time']), datatype=XSD.date))) + # Add textual transcription from source g.add((coa_reference_node, dho_rep_n.textHasPrimaryTranscription, Literal(str(row['ArmTxt'])))) + # CoA identification + # Create node for identified entity + identified_entity = create_identified_entity(row) + + if identified_entity is not None: + # Create identification event + identification_event_node = URIRef(dho_rep_n + str(uuid.uuid4())) + g.add((identification_event_node, RDF.type, dho_ent_n.CoAIdentificationAct)) + g.add((identification_event_node, RDF.type, OWL.NamedIndividual)) + g.add((coa_reference_node, dho_ent_n.hasIdentificationAct, identification_event_node)) + + g.add((identification_event_node, dho_ent_n.attributesCoATo, identified_entity)) + g.add((coa_reference_node, dho_ent_n.hasPrimaryIdentification, identified_entity)) + + # Link identification to Steen Clemmensen (since data transformation only concerns his coa identifications) + person_uri = URIRef(dho_data_n + 'Steen-Clemmensen') + g.add((identification_event_node, DCTERMS.creator, person_uri)) + + # Add user comment by Steen Clemmensen + if not pd.isnull(row['AiCmnt']): + g.add((identification_event_node, dho_ent_n.hasAnnotatorComment, Literal(row['AiCmnt'], datatype=XSD.string))) + + # VORGEHEN # ArmItem-ID als Legacy übernehmen # daraus (vermutlich) eine eindeutige Nachweis-ID machen (Schema dafür dann, wie mit TH besprochen) -- GitLab