Newer
Older
from rdflib.namespace import DCTERMS
from ckan.common import config
import ckan.lib.helpers as helpers
import ckan.model as model
from ckanext.dcat.profiles import DCT
from ckanext.dcat.utils import resource_uri
import ckanext.dcatde.dataset_utils as ds_utils
from ckanext.dcatde.profiles import DCATdeProfile, DCATDE, DCAT, DCATDE_1_0, DCATDE_1_0_1
import ckanext.odsh.helpers as helpers_odsh
import ckanext.odsh.collection.helpers as helpers_collection

Thorge Petersen
committed
import logging
log = logging.getLogger(__name__)
DCT = rdflib.namespace.Namespace("http://purl.org/dc/terms/")
DCAT = rdflib.namespace.Namespace("http://www.w3.org/ns/dcat#")

Thorge Petersen
committed
DCATDE_1_0_2 = rdflib.namespace.Namespace(
"http://dcat-ap.de/def/dcatde/1.0.2/")
ADMS = rdflib.namespace.Namespace("http://www.w3.org/ns/adms#")

Thorge Petersen
committed
class ODSHDCATdeProfile(DCATdeProfile):
# from RDF
def parse_dataset(self, dataset_dict, dataset_ref):
dataset_dict = super(ODSHDCATdeProfile, self).parse_dataset(
dataset_dict, dataset_ref
)
self._parse_distributions(dataset_dict, dataset_ref)
self._parse_type(dataset_dict, dataset_ref)

Jesper Zedlitz
committed
self._parse_political_geocoding_uri(dataset_dict, dataset_ref)
self._parse_references(dataset_dict, dataset_ref)
self._parse_applicable_legislation(dataset_dict, dataset_ref)
self._parse_hvd_category(dataset_dict, dataset_ref)

Thorge Petersen
committed
if self._belongs_to_collection(dataset_dict, dataset_ref):
self._mark_for_adding_to_ckan_collection(dataset_dict, dataset_ref)
return dataset_dict

Thorge Petersen
committed
def _parse_distributions(self, dataset_dict, dataset_ref):
for distribution in self.g.objects(dataset_ref, DCAT.distribution):
for resource_dict in dataset_dict.get('resources', []):
# Match distribution in graph and distribution in ckan-dict
if str(distribution) == resource_uri(resource_dict):
for namespace in [DCATDE, DCATDE_1_0, DCATDE_1_0_1, DCATDE_1_0_2]:
value = self._object_value(
distribution, namespace.licenseAttributionByText)
if value:
ds_utils.insert_new_extras_field(
dataset_dict, 'licenseAttributionByText', value)
return

Thorge Petersen
committed
def _parse_type(self, dataset_dict, dataset_ref):
dct_type = self._object(dataset_ref, DCT.type)
if dct_type:
ckan_type = helpers_odsh.map_dct_type_to_ckan_type(str(dct_type))

Thorge Petersen
committed
def _parse_references(self, dataset_dict, dataset_ref):
value = self._object(dataset_ref, DCT.references)
if value:
ds_utils.insert_new_extras_field(dataset_dict, 'reference', value)

Thorge Petersen
committed

Thorge Petersen
committed
def _parse_metadata_list(self, dataset_dict, dataset_ref, field_name, rdf_predicate):
value = ""
for obj in self.g.objects(dataset_ref, rdf_predicate):
if value:
value += "," + str(obj)
else:
value = str(obj)
if value:
ds_utils.insert_new_extras_field(
dataset_dict, field_name, "{" + value + "}")
def _parse_applicable_legislation(self, dataset_dict, dataset_ref):
self._parse_metadata_list(dataset_dict, dataset_ref, 'applicableLegislation', DCATAP.applicableLegislation)
def _parse_hvd_category(self, dataset_dict, dataset_ref):
self._parse_metadata_list(dataset_dict, dataset_ref, 'hvdCategory', DCATAP.hvdCategory)
def _parse_applicable_legislation(self, dataset_dict, dataset_ref):

Thorge Petersen
committed
value = ""
for applicable_legislation in self.g.objects(dataset_ref, DCATAP.applicableLegislation):
if value:
value += "," + applicable_legislation
else:
value = applicable_legislation
if value:

Thorge Petersen
committed
ds_utils.insert_new_extras_field(

Thorge Petersen
committed
dataset_dict, 'applicableLegislation', "{" + value + "}")
def _parse_hvd_category(self, dataset_dict, dataset_ref):

Thorge Petersen
committed
value = ""
for hvdCategory in self.g.objects(dataset_ref, DCATAP.hvdCategory):
if value:
value += "," + hvdCategory
else:
value = hvdCategory
if value:

Thorge Petersen
committed
ds_utils.insert_new_extras_field(

Thorge Petersen
committed
dataset_dict, 'hvdCategory', "{" + value + "}")

Jesper Zedlitz
committed
def _parse_political_geocoding_uri(self, dataset_dict, dataset_ref):
"""There are datasets that have a politicalGeocodingURI but no spatial_uri."""
extras = extras=dataset_dict.get('extras')
political_geocoding_uri = helpers_odsh.odsh_extract_value_from_extras(extras=extras, key='politicalGeocodingURI')
spatial_uri = helpers_odsh.odsh_extract_value_from_extras(extras=extras, key='spatial_uri')
if not spatial_uri and political_geocoding_uri:
if '"' in political_geocoding_uri:
political_geocoding_uri = political_geocoding_uri.split('"')[1]
ds_utils.insert_new_extras_field( dataset_dict, 'spatial_uri', political_geocoding_uri)
def _belongs_to_collection(self, dataset_dict, dataset_ref):
dct_is_version_of = self._object(dataset_ref, DCT.isVersionOf)
belongs_to_collection = True if dct_is_version_of else False
return belongs_to_collection

Thorge Petersen
committed
def _mark_for_adding_to_ckan_collection(self, dataset_dict, dataset_ref):
dataset_dict.update({'add_to_collection': True})

Thorge Petersen
committed
# to RDF
def graph_from_dataset(self, dataset_dict, dataset_ref):
'''
this class inherits from ODSHDCATdeProfile
it has been extended to add information to
the rdf export
'''
super(ODSHDCATdeProfile, self).graph_from_dataset(
dataset_dict, dataset_ref)
self._add_contributor_id(dataset_dict, dataset_ref)
self._add_license_attribution_by_text(dataset_dict, dataset_ref)
self._add_type(dataset_dict, dataset_ref)

Thorge Petersen
committed
self._add_modified_and_issued(dataset_dict, dataset_ref)

Thorge Petersen
committed
self._add_extra_field(dataset_dict, dataset_ref,
'reference', DCT.references)
self._add_extra_field(dataset_dict, dataset_ref,
'applicableLegislation', DCATAP.applicableLegislation)
self._add_extra_field(dataset_dict, dataset_ref,
'hvdCategory', DCATAP.hvdCategory)
self._add_version(dataset_dict, dataset_ref)
if self._is_dataset_collection(dataset_dict):
self._remove_predefined_collection_members()
self._add_collection_members(dataset_dict, dataset_ref)
if self._dataset_belongs_to_collection(dataset_dict):
self._add_collection(dataset_dict, dataset_ref)

Thorge Petersen
committed
def _add_contributor_id(self, dataset_dict, dataset_ref):
contributorID = 'http://dcat-ap.de/def/contributors/schleswigHolstein'
self.g.add(

Thorge Petersen
committed
(dataset_ref, DCATDE.contributorID,

Thorge Petersen
committed
)

Thorge Petersen
committed
def _add_license_attribution_by_text(self, dataset_dict, dataset_ref):

Thorge Petersen
committed
licenseAttributionByText = self._get_dataset_value(
dataset_dict, 'licenseAttributionByText')

Thorge Petersen
committed
(dataset_ref, DCATDE.licenseAttributionByText,
rdflib.Literal(licenseAttributionByText))
)
for distribution in self.g.objects(dataset_ref, DCAT.distribution):
self.g.set(

Thorge Petersen
committed
(distribution, DCATDE.licenseAttributionByText,
rdflib.Literal(licenseAttributionByText))

Thorge Petersen
committed

Thorge Petersen
committed
def _add_extra_field(self, dataset_dict, dataset_ref, field_name, rdf_property):
'''
Adds extra fields from a dataset dictionary to a RDF graph.
Parameters:
- dataset_dict: A dictionary containing dataset information.
- dataset_ref: The reference to the dataset in the RDF graph.
- field_name: The name of the field to extract from the dataset dictionary.
- rdf_property: The DCAT property to which the field value corresponds.
This function extracts the value of the specified field from the dataset dictionary,
and adds it to the RDF graph with the given dataset reference and DCAT property.
If the field value is a list of values represented as a string enclosed in curly braces,
it splits the string and adds each value individually to the graph.
If the field value is a single value, it is added directly to the graph.
Note: This function includes a workaround since CKAN's extras do not natively support
lists. It treats lists represented as strings enclosed in curly braces as a single string.
'''
field_value_str = dataset_dict.get(field_name)

Thorge Petersen
committed
if not field_value_str:
return

Thorge Petersen
committed
try:
# Remove curly braces from the string
values_string = field_value_str.strip('{}')
# Split the string by commas
field_values = values_string.split(',')
# Trim any extra whitespace from each URL
field_values = [value.strip() for value in field_values]
# Check if the field value is a list
if len(field_values) > 1:
for value in field_values:
self.g.add(
(dataset_ref, rdf_property, rdflib.URIRef(value))
)
else:
# Treat it as a single value

Thorge Petersen
committed
(dataset_ref, rdf_property, rdflib.URIRef(field_values[0]))

Thorge Petersen
committed
except (ValueError, SyntaxError):
# If parsing fails, treat the entire string as a single value

Thorge Petersen
committed
self.g.set(

Thorge Petersen
committed
(dataset_ref, rdf_property, rdflib.URIRef(field_value_str))

Thorge Petersen
committed
)

Thorge Petersen
committed
def _add_modified_and_issued(self, dataset_dict, dataset_ref):
'''
Adds distributions last_modified and created values to
dcat:modified and dcat:issued.
'''
for distribution in self.g.objects(dataset_ref, DCAT.distribution):
for resource_dict in dataset_dict.get('resources', []):
# Match distribution in graph and distribution in ckan-dict
if str(distribution) == resource_uri(resource_dict):

Thorge Petersen
committed
last_modified = resource_dict.get('last_modified', None)
if last_modified:
self.g.set(
(distribution, DCT.modified, rdflib.Literal(
last_modified, datatype="http://www.w3.org/2001/XMLSchema#dateTime"))
)
created = resource_dict.get('created', None)
if created:
self.g.set(
(distribution, DCT.issued, rdflib.Literal(
created, datatype="http://www.w3.org/2001/XMLSchema#dateTime"))
)
def _add_version(self, dataset_dict, dataset_ref):
'''
Adds CKAN isReplacedBy extra field to dublin core isReplacedBy and sets version information.
'''

Thorge Petersen
committed
new_version = dataset_dict.get('is_replaced_by')
if new_version:
self.g.set(
(dataset_ref, DCTERMS.isReplacedBy,
rdflib.URIRef(new_version)
)
)

Thorge Petersen
committed

Thorge Petersen
committed
version_notes = dataset_dict.get('version_notes')
if version_notes:
self.g.set(
(dataset_ref, ADMS.versionNotes,
rdflib.Literal(version_notes)
)
)

Thorge Petersen
committed
def _add_type(self, dataset_dict, dataset_ref):
'''

Thorge Petersen
committed
Adds the type if there is a known mapping from ckan type to dct:type
dct_type = helpers_odsh.map_ckan_type_to_dct_type(ckan_type)

Thorge Petersen
committed
(dataset_ref, DCT.type,

Thorge Petersen
committed
)

Thorge Petersen
committed
def _get_ckan_type(self, dataset_dict):
ckan_type = self._get_dataset_value(dataset_dict, 'type')
return ckan_type

Thorge Petersen
committed
def _remove_predefined_collection_members(self):
for s, p, o in self.g:

Thorge Petersen
committed
if p == DCT.hasVersion:

Thorge Petersen
committed
def _add_collection_members(self, dataset_dict, dataset_ref):

Thorge Petersen
committed
dataset_refs_belonging_to_collection = self._get_dataset_refs_belonging_to_collection(
dataset_dict)
for ref in dataset_refs_belonging_to_collection:
self.g.add(
(dataset_ref, DCT.hasVersion, rdflib.URIRef(ref))
)

Thorge Petersen
committed
def _is_dataset_collection(self, dataset_dict):
ckan_type = self._get_ckan_type(dataset_dict)

Thorge Petersen
committed
is_collection = ckan_type == 'collection'

Thorge Petersen
committed
def _get_dataset_refs_belonging_to_collection(self, dataset_dict):
dataset_names = helpers_collection.get_dataset_names(dataset_dict)
dataset_refs = [self._construct_refs(name) for name in dataset_names]

Thorge Petersen
committed
@staticmethod
def _construct_refs(id):
public_url = config.get('ckan.site_url')
url_to_id = helpers.url_for('dataset.read', id=id)

Thorge Petersen
committed
def _dataset_belongs_to_collection(self, dataset_dict):
'''
returns True if a containing collection is found
'''

Thorge Petersen
committed
if dataset_dict.get('type') == 'collection':
collection_name = helpers_collection.get_collection_id(dataset_dict)

Thorge Petersen
committed
return collection_name is not None
def _add_collection(self, dataset_dict, dataset_ref):
collection_id = helpers_collection.get_collection_id(dataset_dict)
collection_uri = self._construct_refs(collection_id)
self.g.set(

Thorge Petersen
committed
(dataset_ref, DCT.isVersionOf,

Thorge Petersen
committed
)