From aa86f2bb0f87326eb82654a075005b5956a9af32 Mon Sep 17 00:00:00 2001 From: Philipp Schneider <schneider.philipp@uni-muenster.de> Date: Wed, 10 Aug 2022 17:57:38 +0200 Subject: [PATCH] Bug fixing for updated data and structure --- .../integrate_manuscript_metadata_into_kg.py | 7 ++++++- src/rdf-mappings/map-tblBranch.py | 10 +++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/rdf-mappings/integrate_manuscript_metadata_into_kg.py b/src/rdf-mappings/integrate_manuscript_metadata_into_kg.py index a9b3035..9158a26 100644 --- a/src/rdf-mappings/integrate_manuscript_metadata_into_kg.py +++ b/src/rdf-mappings/integrate_manuscript_metadata_into_kg.py @@ -92,6 +92,9 @@ if __name__ == '__main__': if not pd.isnull(row['grid-system']): g.add((manuscript_uri, dho_obj_n.hasGridSystem, Literal(row['grid-system'], datatype=XSD.string))) + # Serialize object graph + functions.serialize_graph(g, output_files) + # Integrate knowledge graph on heraldic data print('Loading Knowledge Graph file...') g.parse(kg_file) @@ -109,7 +112,9 @@ if __name__ == '__main__': g.add((subject, dho_rep_n.partOfObject, manuscript_uri)) # Serialize the prepared graph - functions.serialize_graph(g, output_files) + print('Serialilzing representation graph...') + g.serialize(destination='data/rdf-output/research-dataset/knowledge-graph-representation-research-dataset.ttl', format='ttl') + g.serialize(destination='data/rdf-output/research-dataset/knowledge-graph-representation-research-dataset.jsonld', format='json-ld') diff --git a/src/rdf-mappings/map-tblBranch.py b/src/rdf-mappings/map-tblBranch.py index 2258a5c..aa77a9c 100644 --- a/src/rdf-mappings/map-tblBranch.py +++ b/src/rdf-mappings/map-tblBranch.py @@ -244,6 +244,9 @@ def add_layer(layer_number, layer_content, field_node, previous_layer_node=None, if charge + '_Num' in df_tblBranch.columns and not pd.isnull(row[charge + '_Num']): if row[charge + '_Num'] == 'Multiple': g.add((charge_node, dho_n.hasNumber, dho_n.Multiple)) + elif row[charge + '_Num'] == 'pair of': + number_uri = create_owl_class_from_string('pair of') + g.add((charge_node, dho_n.hasNumber, number_uri)) else: g.add((charge_node, dho_n.hasNumber, Literal(int(row[charge + '_Num'])))) @@ -382,9 +385,6 @@ if mapping_config['create_research_dataset'] == False: df_tblBranch = df_tblBranch[(df_tblBranch['checked_for_publication'] == "true") | (df_tblBranch['checked_for_publication'] == 'secondary-description')] df_tblBranch = df_tblBranch[pd.isnull(df_tblBranch['published_as_rdf'])] -# Filter all entries that cannot yet be used for RDF transformation and must therefore be skipped -df_tblBranch = df_tblBranch[(df_tblBranch['checkBlasonIndividually'] == "true")] - print(df_tblBranch) # Initialize rdflib graph object @@ -408,6 +408,10 @@ if mapping_config['add_metadata'] == True: # Iterate through CSV to build graph for index, row in df_tblBranch.iterrows(): + # Filter all entries that cannot yet be used for RDF transformation and must therefore be skipped + if row['checkBlasonIndividually'] == 'true' or row['checkBlasonIndividually'] == True: + continue + # New coa description branch_id = str(row['BranchID']) -- GitLab