Skip to content
Snippets Groups Projects
Commit cd0fbe6d authored by Philipp Schneider's avatar Philipp Schneider
Browse files

Add functionality to integrate coa identifications from tblArmItems

parent 5aa17552
No related branches found
No related tags found
No related merge requests found
...@@ -10,6 +10,7 @@ from rdflib.namespace import RDF, RDFS, Namespace, OWL, DCTERMS, XSD ...@@ -10,6 +10,7 @@ from rdflib.namespace import RDF, RDFS, Namespace, OWL, DCTERMS, XSD
import pandas as pd import pandas as pd
import uuid import uuid
import json import json
import re
from dho_namespaces import * from dho_namespaces import *
import functions import functions
...@@ -37,6 +38,32 @@ def add_data_to_description_event(description_event_node, coa_node, row, person_ ...@@ -37,6 +38,32 @@ def add_data_to_description_event(description_event_node, coa_node, row, person_
g.add((description_event_node, DCTERMS.creator, person_uri)) g.add((description_event_node, DCTERMS.creator, person_uri))
def create_identified_entity(row):
"""
Creates a node of type dhoe:IdentifiedEntity from the identification provided in tblArmItems.
Returns the URI of the newly created node.
"""
new_class_name = row['ArmIdf']
# Check if string starts with a letter
if not pd.isnull(row['ArmIdf']) and re.search("^[a-zA-Z]", new_class_name) is not None:
new_class_name = functions.clean_class_name_string(new_class_name)
new_class_name = str(new_class_name).title()
new_class_name = new_class_name.replace(' ', '')
new_entity_node = URIRef(dho_ent_n + new_class_name)
g.add((new_entity_node, RDF.type, dho_ent_n.IdentifiedEntity))
g.add((new_entity_node, RDF.type, OWL.NamedIndividual))
g.add((new_entity_node, dho_ent_n.hasAppellation, Literal(row['ArmIdf'], datatype=XSD.string)))
print(new_entity_node)
return new_entity_node
else:
return None
# Load config file # Load config file
with open("config/config-map-tblArmItems.json", "r") as config_file: with open("config/config-map-tblArmItems.json", "r") as config_file:
mapping_config = json.load(config_file) mapping_config = json.load(config_file)
...@@ -59,8 +86,11 @@ g = Graph() ...@@ -59,8 +86,11 @@ g = Graph()
g = bind_namespaces(g) g = bind_namespaces(g)
# Load the existing Knowledge Graph and integrate the new data there, if stated so in the config file # Load the existing Knowledge Graph and integrate the new data there, if stated so in the config file
if mapping_config['existing_ontology'] is not None: if mapping_config['existing_representation_ontology'] is not None:
g.parse(mapping_config['existing_ontology']) g.parse(mapping_config['existing_representation_ontology'])
if mapping_config['existing_entity_ontology'] is not None:
g.parse(mapping_config['existing_entity_ontology'])
# Define initial classes and properties if the respective option is set in the config file # Define initial classes and properties if the respective option is set in the config file
if mapping_config['initial_ontology_definitions'] != None: if mapping_config['initial_ontology_definitions'] != None:
...@@ -75,6 +105,8 @@ for index, row in df_tblArmItems.iterrows(): ...@@ -75,6 +105,8 @@ for index, row in df_tblArmItems.iterrows():
g.add((coa_reference_node, RDF.type, dho_rep_n.CoatOfArmsRepresentation)) g.add((coa_reference_node, RDF.type, dho_rep_n.CoatOfArmsRepresentation))
g.add((coa_reference_node, RDF.type, OWL.NamedIndividual)) g.add((coa_reference_node, RDF.type, OWL.NamedIndividual))
g.add((coa_reference_node, dho_rep_n.hasOMAid, Literal(str(int(row['ArmItemID'])), datatype=XSD.string)))
if not pd.isnull(row['ItemNr']): if not pd.isnull(row['ItemNr']):
g.add((coa_reference_node, dho_rep_n.hasItemID, Literal(int(row['ItemNr'])))) g.add((coa_reference_node, dho_rep_n.hasItemID, Literal(int(row['ItemNr']))))
...@@ -122,8 +154,32 @@ for index, row in df_tblArmItems.iterrows(): ...@@ -122,8 +154,32 @@ for index, row in df_tblArmItems.iterrows():
if not pd.isnull(row['description_time']): if not pd.isnull(row['description_time']):
g.add((description_event_node, DCTERMS.date, Literal(str(row['description_time']), datatype=XSD.date))) g.add((description_event_node, DCTERMS.date, Literal(str(row['description_time']), datatype=XSD.date)))
# Add textual transcription from source
g.add((coa_reference_node, dho_rep_n.textHasPrimaryTranscription, Literal(str(row['ArmTxt'])))) g.add((coa_reference_node, dho_rep_n.textHasPrimaryTranscription, Literal(str(row['ArmTxt']))))
# CoA identification
# Create node for identified entity
identified_entity = create_identified_entity(row)
if identified_entity is not None:
# Create identification event
identification_event_node = URIRef(dho_rep_n + str(uuid.uuid4()))
g.add((identification_event_node, RDF.type, dho_ent_n.CoAIdentificationAct))
g.add((identification_event_node, RDF.type, OWL.NamedIndividual))
g.add((coa_reference_node, dho_ent_n.hasIdentificationAct, identification_event_node))
g.add((identification_event_node, dho_ent_n.attributesCoATo, identified_entity))
g.add((coa_reference_node, dho_ent_n.hasPrimaryIdentification, identified_entity))
# Link identification to Steen Clemmensen (since data transformation only concerns his coa identifications)
person_uri = URIRef(dho_data_n + 'Steen-Clemmensen')
g.add((identification_event_node, DCTERMS.creator, person_uri))
# Add user comment by Steen Clemmensen
if not pd.isnull(row['AiCmnt']):
g.add((identification_event_node, dho_ent_n.hasAnnotatorComment, Literal(row['AiCmnt'], datatype=XSD.string)))
# VORGEHEN # VORGEHEN
# ArmItem-ID als Legacy übernehmen # ArmItem-ID als Legacy übernehmen
# daraus (vermutlich) eine eindeutige Nachweis-ID machen (Schema dafür dann, wie mit TH besprochen) # daraus (vermutlich) eine eindeutige Nachweis-ID machen (Schema dafür dann, wie mit TH besprochen)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment