Newer
Older
from rdflib.namespace import DCTERMS
from ckan.common import config
import ckan.lib.helpers as helpers
import ckan.model as model
from ckanext.dcat.profiles import DCT
from ckanext.dcat.utils import resource_uri
import ckanext.dcatde.dataset_utils as ds_utils
from ckanext.dcatde.profiles import DCATdeProfile, DCATDE, DCAT, DCATDE_1_0, DCATDE_1_0_1
import ckanext.odsh.helpers as helpers_odsh
import ckanext.odsh.collection.helpers as helpers_collection
DCT = rdflib.namespace.Namespace("http://purl.org/dc/terms/")
DCAT = rdflib.namespace.Namespace("http://www.w3.org/ns/dcat#")
DCATDE_1_0_2 = rdflib.namespace.Namespace("http://dcat-ap.de/def/dcatde/1.0.2/")
ADMS = rdflib.namespace.Namespace("http://www.w3.org/ns/adms#")
class ODSHDCATdeProfile(DCATdeProfile):
# from RDF
def parse_dataset(self, dataset_dict, dataset_ref):
dataset_dict = super(ODSHDCATdeProfile, self).parse_dataset(
dataset_dict, dataset_ref
)
self._parse_distributions(dataset_dict, dataset_ref)
self._parse_type(dataset_dict, dataset_ref)
self._parse_references(dataset_dict, dataset_ref)
self._parse_applicable_legislation(dataset_dict, dataset_ref)
self._parse_hvd_category(dataset_dict, dataset_ref)
if self._belongs_to_collection(dataset_dict, dataset_ref):
self._mark_for_adding_to_ckan_collection(dataset_dict, dataset_ref)
return dataset_dict

Thorge Petersen
committed
def _parse_distributions(self, dataset_dict, dataset_ref):
for distribution in self.g.objects(dataset_ref, DCAT.distribution):
for resource_dict in dataset_dict.get('resources', []):
# Match distribution in graph and distribution in ckan-dict
if str(distribution) == resource_uri(resource_dict):
for namespace in [DCATDE, DCATDE_1_0, DCATDE_1_0_1, DCATDE_1_0_2]:
value = self._object_value(
distribution, namespace.licenseAttributionByText)
if value:
ds_utils.insert_new_extras_field(
dataset_dict, 'licenseAttributionByText', value)
return

Thorge Petersen
committed
def _parse_type(self, dataset_dict, dataset_ref):
dct_type = self._object(dataset_ref, DCT.type)
if dct_type:
ckan_type = helpers_odsh.map_dct_type_to_ckan_type(str(dct_type))

Thorge Petersen
committed
def _parse_references(self, dataset_dict, dataset_ref):
value = self._object(dataset_ref, DCT.references)
if value:
ds_utils.insert_new_extras_field(dataset_dict, 'reference', value)
def _parse_applicable_legislation(self, dataset_dict, dataset_ref):
value = self._object(dataset_ref, DCT.applicableLegislation)
if value:
ds_utils.insert_new_extras_field(dataset_dict, 'applicableLegislation', value)
def _parse_hvd_category(self, dataset_dict, dataset_ref):
value = self._object(dataset_ref, DCT.hvdCategory)
if value:
ds_utils.insert_new_extras_field(dataset_dict, 'hvdCategory', value)
def _belongs_to_collection(self, dataset_dict, dataset_ref):
dct_is_version_of = self._object(dataset_ref, DCT.isVersionOf)
belongs_to_collection = True if dct_is_version_of else False
return belongs_to_collection

Thorge Petersen
committed
def _mark_for_adding_to_ckan_collection(self, dataset_dict, dataset_ref):
dataset_dict.update({'add_to_collection': True})

Thorge Petersen
committed
# to RDF
def graph_from_dataset(self, dataset_dict, dataset_ref):
'''
this class inherits from ODSHDCATdeProfile
it has been extended to add information to
the rdf export
'''
super(ODSHDCATdeProfile, self).graph_from_dataset(
dataset_dict, dataset_ref)
self._add_contributor_id(dataset_dict, dataset_ref)
self._add_license_attribution_by_text(dataset_dict, dataset_ref)
self._add_type(dataset_dict, dataset_ref)

Thorge Petersen
committed
self._add_modified_and_issued(dataset_dict, dataset_ref)
self._add_extra_field(dataset_dict, dataset_ref, 'reference', DCT.references)
self._add_extra_field(dataset_dict, dataset_ref, 'applicableLegislation', DCAT.applicableLegislation)
self._add_extra_field(dataset_dict, dataset_ref, 'hvdCategory', DCAT.hvdCategory)
self._add_version(dataset_dict, dataset_ref)
if self._is_dataset_collection(dataset_dict):
self._remove_predefined_collection_members()
self._add_collection_members(dataset_dict, dataset_ref)
if self._dataset_belongs_to_collection(dataset_dict):
self._add_collection(dataset_dict, dataset_ref)

Thorge Petersen
committed
def _add_contributor_id(self, dataset_dict, dataset_ref):
contributorID = 'http://dcat-ap.de/def/contributors/schleswigHolstein'
self.g.add(

Thorge Petersen
committed
(dataset_ref, DCATDE.contributorID,

Thorge Petersen
committed
)

Thorge Petersen
committed
def _add_license_attribution_by_text(self, dataset_dict, dataset_ref):

Thorge Petersen
committed
licenseAttributionByText = self._get_dataset_value(
dataset_dict, 'licenseAttributionByText')

Thorge Petersen
committed
(dataset_ref, DCATDE.licenseAttributionByText,
rdflib.Literal(licenseAttributionByText))
)
for distribution in self.g.objects(dataset_ref, DCAT.distribution):
self.g.set(

Thorge Petersen
committed
(distribution, DCATDE.licenseAttributionByText,
rdflib.Literal(licenseAttributionByText))

Thorge Petersen
committed
def _add_extra_field(self, dataset_dict, dataset_ref, field_name, dcat_property):
uri = helpers_odsh.odsh_extract_value_from_extras(dataset_dict.get('extras'), field_name)
if uri:

Thorge Petersen
committed
self.g.set(
(dataset_ref, dcat_property, rdflib.URIRef(uri))

Thorge Petersen
committed
)

Thorge Petersen
committed

Thorge Petersen
committed
def _add_modified_and_issued(self, dataset_dict, dataset_ref):
'''
Adds distributions last_modified and created values to
dcat:modified and dcat:issued.
'''
for distribution in self.g.objects(dataset_ref, DCAT.distribution):
for resource_dict in dataset_dict.get('resources', []):
# Match distribution in graph and distribution in ckan-dict
if str(distribution) == resource_uri(resource_dict):

Thorge Petersen
committed
last_modified = resource_dict.get('last_modified', None)
if last_modified:
self.g.set(
(distribution, DCT.modified, rdflib.Literal(
last_modified, datatype="http://www.w3.org/2001/XMLSchema#dateTime"))
)
created = resource_dict.get('created', None)
if created:
self.g.set(
(distribution, DCT.issued, rdflib.Literal(
created, datatype="http://www.w3.org/2001/XMLSchema#dateTime"))
)
def _add_version(self, dataset_dict, dataset_ref):
'''
Adds CKAN isReplacedBy extra field to dublin core isReplacedBy and sets version information.
'''
new_version = helpers_odsh.odsh_extract_value_from_extras(dataset_dict.get('extras'), 'is_replaced_by')
if new_version:
self.g.set(
(dataset_ref, DCTERMS.isReplacedBy,
rdflib.URIRef(new_version)
)
)
version_notes = helpers_odsh.odsh_extract_value_from_extras(dataset_dict.get('extras'), 'version_notes')
if version_notes:
self.g.set(
(dataset_ref, ADMS.versionNotes,
rdflib.Literal(version_notes)
)
)
def _add_type(self, dataset_dict, dataset_ref):
'''
adds the type if there is a known mapping from ckan type to
dct:type
'''
ckan_type = self._get_ckan_type(dataset_dict)
dct_type = helpers_odsh.map_ckan_type_to_dct_type(ckan_type)

Thorge Petersen
committed
(dataset_ref, DCT.type,

Thorge Petersen
committed
)

Thorge Petersen
committed
def _get_ckan_type(self, dataset_dict):
ckan_type = self._get_dataset_value(dataset_dict, 'type')
return ckan_type

Thorge Petersen
committed
def _remove_predefined_collection_members(self):
for s, p, o in self.g:

Thorge Petersen
committed
if p == DCT.hasVersion:

Thorge Petersen
committed
def _add_collection_members(self, dataset_dict, dataset_ref):

Thorge Petersen
committed
dataset_refs_belonging_to_collection = self._get_dataset_refs_belonging_to_collection(
dataset_dict)
for ref in dataset_refs_belonging_to_collection:
self.g.add(
(dataset_ref, DCT.hasVersion, rdflib.URIRef(ref))
)

Thorge Petersen
committed
def _is_dataset_collection(self, dataset_dict):
ckan_type = self._get_ckan_type(dataset_dict)

Thorge Petersen
committed
is_collection = ckan_type == 'collection'

Thorge Petersen
committed
def _get_dataset_refs_belonging_to_collection(self, dataset_dict):
dataset_names = helpers_collection.get_dataset_names(dataset_dict)
dataset_refs = [self._construct_refs(name) for name in dataset_names]

Thorge Petersen
committed
@staticmethod
def _construct_refs(id):
public_url = config.get('ckan.site_url')
url_to_id = helpers.url_for('dataset.read', id=id)

Thorge Petersen
committed
def _dataset_belongs_to_collection(self, dataset_dict):
'''
returns True if a containing collection is found
'''

Thorge Petersen
committed
if dataset_dict.get('type') == 'collection':
collection_name = helpers_collection.get_collection_id(dataset_dict)

Thorge Petersen
committed
return collection_name is not None
def _add_collection(self, dataset_dict, dataset_ref):
collection_id = helpers_collection.get_collection_id(dataset_dict)
collection_uri = self._construct_refs(collection_id)
self.g.set(

Thorge Petersen
committed
(dataset_ref, DCT.isVersionOf,

Thorge Petersen
committed
)