diff --git a/ckanext/odsh/profiles/odsh_dcat_de_profile.py b/ckanext/odsh/profiles/odsh_dcat_de_profile.py index 14ef3ef5ace7c9371020cbdeb9ef83abad289ca7..5ccf29f3f1cba410f7b8b3b42afbda178595fda1 100644 --- a/ckanext/odsh/profiles/odsh_dcat_de_profile.py +++ b/ckanext/odsh/profiles/odsh_dcat_de_profile.py @@ -11,14 +11,18 @@ from ckanext.dcatde.profiles import DCATdeProfile, DCATDE, DCAT, DCATDE_1_0, DCA import ckanext.odsh.helpers as helpers_odsh import ckanext.odsh.collection.helpers as helpers_collection -import ast +import logging + +log = logging.getLogger(__name__) DCT = rdflib.namespace.Namespace("http://purl.org/dc/terms/") DCAT = rdflib.namespace.Namespace("http://www.w3.org/ns/dcat#") DCATAP = rdflib.namespace.Namespace("http://data.europa.eu/r5r/") -DCATDE_1_0_2 = rdflib.namespace.Namespace("http://dcat-ap.de/def/dcatde/1.0.2/") +DCATDE_1_0_2 = rdflib.namespace.Namespace( + "http://dcat-ap.de/def/dcatde/1.0.2/") ADMS = rdflib.namespace.Namespace("http://www.w3.org/ns/adms#") + class ODSHDCATdeProfile(DCATdeProfile): # from RDF @@ -32,7 +36,7 @@ class ODSHDCATdeProfile(DCATdeProfile): self._parse_references(dataset_dict, dataset_ref) self._parse_applicable_legislation(dataset_dict, dataset_ref) self._parse_hvd_category(dataset_dict, dataset_ref) - + if self._belongs_to_collection(dataset_dict, dataset_ref): self._mark_for_adding_to_ckan_collection(dataset_dict, dataset_ref) return dataset_dict @@ -60,16 +64,18 @@ class ODSHDCATdeProfile(DCATdeProfile): value = self._object(dataset_ref, DCT.references) if value: ds_utils.insert_new_extras_field(dataset_dict, 'reference', value) - + def _parse_applicable_legislation(self, dataset_dict, dataset_ref): value = self._object(dataset_ref, DCT.applicableLegislation) if value: - ds_utils.insert_new_extras_field(dataset_dict, 'applicableLegislation', value) + ds_utils.insert_new_extras_field( + dataset_dict, 'applicableLegislation', value) def _parse_hvd_category(self, dataset_dict, dataset_ref): value = self._object(dataset_ref, DCT.hvdCategory) if value: - ds_utils.insert_new_extras_field(dataset_dict, 'hvdCategory', value) + ds_utils.insert_new_extras_field( + dataset_dict, 'hvdCategory', value) def _belongs_to_collection(self, dataset_dict, dataset_ref): dct_is_version_of = self._object(dataset_ref, DCT.isVersionOf) @@ -94,9 +100,12 @@ class ODSHDCATdeProfile(DCATdeProfile): self._add_license_attribution_by_text(dataset_dict, dataset_ref) self._add_type(dataset_dict, dataset_ref) self._add_modified_and_issued(dataset_dict, dataset_ref) - self._add_extra_field(dataset_dict, dataset_ref, 'reference', DCT.references) - self._add_extra_field(dataset_dict, dataset_ref, 'applicableLegislation', DCATAP.applicableLegislation) - self._add_extra_field(dataset_dict, dataset_ref, 'hvdCategory', DCATAP.hvdCategory) + self._add_extra_field(dataset_dict, dataset_ref, + 'reference', DCT.references) + self._add_extra_field(dataset_dict, dataset_ref, + 'applicableLegislation', DCATAP.applicableLegislation) + self._add_extra_field(dataset_dict, dataset_ref, + 'hvdCategory', DCATAP.hvdCategory) self._add_version(dataset_dict, dataset_ref) if self._is_dataset_collection(dataset_dict): self._remove_predefined_collection_members() @@ -126,30 +135,57 @@ class ODSHDCATdeProfile(DCATdeProfile): rdflib.Literal(licenseAttributionByText)) ) - def _add_extra_field(self, dataset_dict, dataset_ref, field_name, dcat_property): - field_value_str = helpers_odsh.odsh_extract_value_from_extras(dataset_dict.get('extras'), field_name) + def _add_extra_field(self, dataset_dict, dataset_ref, field_name, rdf_property): + ''' + Adds extra fields from a dataset dictionary to a RDF graph. + + Parameters: + - dataset_dict: A dictionary containing dataset information. + - dataset_ref: The reference to the dataset in the RDF graph. + - field_name: The name of the field to extract from the dataset dictionary. + - rdf_property: The DCAT property to which the field value corresponds. + + This function extracts the value of the specified field from the dataset dictionary, + and adds it to the RDF graph with the given dataset reference and DCAT property. + If the field value is a list of values represented as a string enclosed in curly braces, + it splits the string and adds each value individually to the graph. + If the field value is a single value, it is added directly to the graph. + + Note: This function includes a workaround since CKAN's extras do not natively support + lists. It treats lists represented as strings enclosed in curly braces as a single string. + ''' + field_value_str = dataset_dict.get(field_name) + + if not field_value_str: + return try: - # Attempt to parse the string as a Python literal - field_value = ast.literal_eval(field_value_str) - if not isinstance(field_value, set): - # If it's not a set, treat it as a single value - raise ValueError("Not a set") - except (ValueError, SyntaxError): - # If parsing fails, treat the entire string as a single value - field_value = field_value_str + # Remove curly braces from the string + values_string = field_value_str.strip('{}') + + # Split the string by commas + field_values = values_string.split(',') + + # Trim any extra whitespace from each URL + field_values = [value.strip() for value in field_values] - if isinstance(field_value, set): - for value in field_value: + # Check if the field value is a list + if len(field_values) > 1: + for value in field_values: + self.g.add( + (dataset_ref, rdf_property, rdflib.URIRef(value)) + ) + else: + # Treat it as a single value self.g.set( - (dataset_ref, dcat_property, rdflib.URIRef(value)) + (dataset_ref, rdf_property, rdflib.URIRef(field_values[0])) ) - else: + except (ValueError, SyntaxError): + # If parsing fails, treat the entire string as a single value self.g.set( - (dataset_ref, dcat_property, rdflib.URIRef(field_value)) + (dataset_ref, rdf_property, rdflib.URIRef(field_value_str)) ) - def _add_modified_and_issued(self, dataset_dict, dataset_ref): ''' Adds distributions last_modified and created values to @@ -176,26 +212,27 @@ class ODSHDCATdeProfile(DCATdeProfile): ''' Adds CKAN isReplacedBy extra field to dublin core isReplacedBy and sets version information. ''' - new_version = helpers_odsh.odsh_extract_value_from_extras(dataset_dict.get('extras'), 'is_replaced_by') + new_version = helpers_odsh.odsh_extract_value_from_extras( + dataset_dict.get('extras'), 'is_replaced_by') if new_version: self.g.set( (dataset_ref, DCTERMS.isReplacedBy, rdflib.URIRef(new_version) ) ) - - version_notes = helpers_odsh.odsh_extract_value_from_extras(dataset_dict.get('extras'), 'version_notes') + + version_notes = helpers_odsh.odsh_extract_value_from_extras( + dataset_dict.get('extras'), 'version_notes') if version_notes: self.g.set( (dataset_ref, ADMS.versionNotes, rdflib.Literal(version_notes) ) ) - + def _add_type(self, dataset_dict, dataset_ref): ''' - adds the type if there is a known mapping from ckan type to - dct:type + Adds the type if there is a known mapping from ckan type to dct:type ''' ckan_type = self._get_ckan_type(dataset_dict) dct_type = helpers_odsh.map_ckan_type_to_dct_type(ckan_type)