Skip to content
Snippets Groups Projects
Commit 980ee5d8 authored by Thorge Petersen's avatar Thorge Petersen
Browse files

Reworked _add_extra_field function to properly insert hvd categories and appl. legislation

parent 7328bb20
No related branches found
No related tags found
1 merge request!52Added applicableLegislation and hvdCategory support
...@@ -11,14 +11,18 @@ from ckanext.dcatde.profiles import DCATdeProfile, DCATDE, DCAT, DCATDE_1_0, DCA ...@@ -11,14 +11,18 @@ from ckanext.dcatde.profiles import DCATdeProfile, DCATDE, DCAT, DCATDE_1_0, DCA
import ckanext.odsh.helpers as helpers_odsh import ckanext.odsh.helpers as helpers_odsh
import ckanext.odsh.collection.helpers as helpers_collection import ckanext.odsh.collection.helpers as helpers_collection
import ast import logging
log = logging.getLogger(__name__)
DCT = rdflib.namespace.Namespace("http://purl.org/dc/terms/") DCT = rdflib.namespace.Namespace("http://purl.org/dc/terms/")
DCAT = rdflib.namespace.Namespace("http://www.w3.org/ns/dcat#") DCAT = rdflib.namespace.Namespace("http://www.w3.org/ns/dcat#")
DCATAP = rdflib.namespace.Namespace("http://data.europa.eu/r5r/") DCATAP = rdflib.namespace.Namespace("http://data.europa.eu/r5r/")
DCATDE_1_0_2 = rdflib.namespace.Namespace("http://dcat-ap.de/def/dcatde/1.0.2/") DCATDE_1_0_2 = rdflib.namespace.Namespace(
"http://dcat-ap.de/def/dcatde/1.0.2/")
ADMS = rdflib.namespace.Namespace("http://www.w3.org/ns/adms#") ADMS = rdflib.namespace.Namespace("http://www.w3.org/ns/adms#")
class ODSHDCATdeProfile(DCATdeProfile): class ODSHDCATdeProfile(DCATdeProfile):
# from RDF # from RDF
...@@ -64,12 +68,14 @@ class ODSHDCATdeProfile(DCATdeProfile): ...@@ -64,12 +68,14 @@ class ODSHDCATdeProfile(DCATdeProfile):
def _parse_applicable_legislation(self, dataset_dict, dataset_ref): def _parse_applicable_legislation(self, dataset_dict, dataset_ref):
value = self._object(dataset_ref, DCT.applicableLegislation) value = self._object(dataset_ref, DCT.applicableLegislation)
if value: if value:
ds_utils.insert_new_extras_field(dataset_dict, 'applicableLegislation', value) ds_utils.insert_new_extras_field(
dataset_dict, 'applicableLegislation', value)
def _parse_hvd_category(self, dataset_dict, dataset_ref): def _parse_hvd_category(self, dataset_dict, dataset_ref):
value = self._object(dataset_ref, DCT.hvdCategory) value = self._object(dataset_ref, DCT.hvdCategory)
if value: if value:
ds_utils.insert_new_extras_field(dataset_dict, 'hvdCategory', value) ds_utils.insert_new_extras_field(
dataset_dict, 'hvdCategory', value)
def _belongs_to_collection(self, dataset_dict, dataset_ref): def _belongs_to_collection(self, dataset_dict, dataset_ref):
dct_is_version_of = self._object(dataset_ref, DCT.isVersionOf) dct_is_version_of = self._object(dataset_ref, DCT.isVersionOf)
...@@ -94,9 +100,12 @@ class ODSHDCATdeProfile(DCATdeProfile): ...@@ -94,9 +100,12 @@ class ODSHDCATdeProfile(DCATdeProfile):
self._add_license_attribution_by_text(dataset_dict, dataset_ref) self._add_license_attribution_by_text(dataset_dict, dataset_ref)
self._add_type(dataset_dict, dataset_ref) self._add_type(dataset_dict, dataset_ref)
self._add_modified_and_issued(dataset_dict, dataset_ref) self._add_modified_and_issued(dataset_dict, dataset_ref)
self._add_extra_field(dataset_dict, dataset_ref, 'reference', DCT.references) self._add_extra_field(dataset_dict, dataset_ref,
self._add_extra_field(dataset_dict, dataset_ref, 'applicableLegislation', DCATAP.applicableLegislation) 'reference', DCT.references)
self._add_extra_field(dataset_dict, dataset_ref, 'hvdCategory', DCATAP.hvdCategory) self._add_extra_field(dataset_dict, dataset_ref,
'applicableLegislation', DCATAP.applicableLegislation)
self._add_extra_field(dataset_dict, dataset_ref,
'hvdCategory', DCATAP.hvdCategory)
self._add_version(dataset_dict, dataset_ref) self._add_version(dataset_dict, dataset_ref)
if self._is_dataset_collection(dataset_dict): if self._is_dataset_collection(dataset_dict):
self._remove_predefined_collection_members() self._remove_predefined_collection_members()
...@@ -126,29 +135,56 @@ class ODSHDCATdeProfile(DCATdeProfile): ...@@ -126,29 +135,56 @@ class ODSHDCATdeProfile(DCATdeProfile):
rdflib.Literal(licenseAttributionByText)) rdflib.Literal(licenseAttributionByText))
) )
def _add_extra_field(self, dataset_dict, dataset_ref, field_name, dcat_property): def _add_extra_field(self, dataset_dict, dataset_ref, field_name, rdf_property):
field_value_str = helpers_odsh.odsh_extract_value_from_extras(dataset_dict.get('extras'), field_name) '''
Adds extra fields from a dataset dictionary to a RDF graph.
Parameters:
- dataset_dict: A dictionary containing dataset information.
- dataset_ref: The reference to the dataset in the RDF graph.
- field_name: The name of the field to extract from the dataset dictionary.
- rdf_property: The DCAT property to which the field value corresponds.
This function extracts the value of the specified field from the dataset dictionary,
and adds it to the RDF graph with the given dataset reference and DCAT property.
If the field value is a list of values represented as a string enclosed in curly braces,
it splits the string and adds each value individually to the graph.
If the field value is a single value, it is added directly to the graph.
Note: This function includes a workaround since CKAN's extras do not natively support
lists. It treats lists represented as strings enclosed in curly braces as a single string.
'''
field_value_str = dataset_dict.get(field_name)
if not field_value_str:
return
try: try:
# Attempt to parse the string as a Python literal # Remove curly braces from the string
field_value = ast.literal_eval(field_value_str) values_string = field_value_str.strip('{}')
if not isinstance(field_value, set):
# If it's not a set, treat it as a single value
raise ValueError("Not a set")
except (ValueError, SyntaxError):
# If parsing fails, treat the entire string as a single value
field_value = field_value_str
if isinstance(field_value, set): # Split the string by commas
for value in field_value: field_values = values_string.split(',')
self.g.set(
(dataset_ref, dcat_property, rdflib.URIRef(value)) # Trim any extra whitespace from each URL
field_values = [value.strip() for value in field_values]
# Check if the field value is a list
if len(field_values) > 1:
for value in field_values:
self.g.add(
(dataset_ref, rdf_property, rdflib.URIRef(value))
) )
else: else:
# Treat it as a single value
self.g.set( self.g.set(
(dataset_ref, dcat_property, rdflib.URIRef(field_value)) (dataset_ref, rdf_property, rdflib.URIRef(field_values[0]))
)
except (ValueError, SyntaxError):
# If parsing fails, treat the entire string as a single value
self.g.set(
(dataset_ref, rdf_property, rdflib.URIRef(field_value_str))
) )
def _add_modified_and_issued(self, dataset_dict, dataset_ref): def _add_modified_and_issued(self, dataset_dict, dataset_ref):
''' '''
...@@ -176,7 +212,8 @@ class ODSHDCATdeProfile(DCATdeProfile): ...@@ -176,7 +212,8 @@ class ODSHDCATdeProfile(DCATdeProfile):
''' '''
Adds CKAN isReplacedBy extra field to dublin core isReplacedBy and sets version information. Adds CKAN isReplacedBy extra field to dublin core isReplacedBy and sets version information.
''' '''
new_version = helpers_odsh.odsh_extract_value_from_extras(dataset_dict.get('extras'), 'is_replaced_by') new_version = helpers_odsh.odsh_extract_value_from_extras(
dataset_dict.get('extras'), 'is_replaced_by')
if new_version: if new_version:
self.g.set( self.g.set(
(dataset_ref, DCTERMS.isReplacedBy, (dataset_ref, DCTERMS.isReplacedBy,
...@@ -184,7 +221,8 @@ class ODSHDCATdeProfile(DCATdeProfile): ...@@ -184,7 +221,8 @@ class ODSHDCATdeProfile(DCATdeProfile):
) )
) )
version_notes = helpers_odsh.odsh_extract_value_from_extras(dataset_dict.get('extras'), 'version_notes') version_notes = helpers_odsh.odsh_extract_value_from_extras(
dataset_dict.get('extras'), 'version_notes')
if version_notes: if version_notes:
self.g.set( self.g.set(
(dataset_ref, ADMS.versionNotes, (dataset_ref, ADMS.versionNotes,
...@@ -194,8 +232,7 @@ class ODSHDCATdeProfile(DCATdeProfile): ...@@ -194,8 +232,7 @@ class ODSHDCATdeProfile(DCATdeProfile):
def _add_type(self, dataset_dict, dataset_ref): def _add_type(self, dataset_dict, dataset_ref):
''' '''
adds the type if there is a known mapping from ckan type to Adds the type if there is a known mapping from ckan type to dct:type
dct:type
''' '''
ckan_type = self._get_ckan_type(dataset_dict) ckan_type = self._get_ckan_type(dataset_dict)
dct_type = helpers_odsh.map_ckan_type_to_dct_type(ckan_type) dct_type = helpers_odsh.map_ckan_type_to_dct_type(ckan_type)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment