Skip to content
Snippets Groups Projects
odsh_dcat_de_profile.py 10.1 KiB
Newer Older
  • Learn to ignore specific revisions
  • root's avatar
    root committed
    import rdflib
    
    from rdflib.namespace import DCTERMS
    
    root's avatar
    root committed
    
    from ckan.common import config
    import ckan.lib.helpers as helpers
    import ckan.model as model
    from ckanext.dcat.profiles import DCT
    from ckanext.dcat.utils import resource_uri
    import ckanext.dcatde.dataset_utils as ds_utils
    
    Thorge Petersen's avatar
    Thorge Petersen committed
    from ckanext.dcatde.profiles import DCATdeProfile, DCATDE, DCAT, DCATDE_1_0, DCATDE_1_0_1
    
    root's avatar
    root committed
    
    
    import ckanext.odsh.helpers as helpers_odsh
    
    root's avatar
    root committed
    import ckanext.odsh.collection.helpers as helpers_collection
    
    
    DCT = rdflib.namespace.Namespace("http://purl.org/dc/terms/")
    DCAT = rdflib.namespace.Namespace("http://www.w3.org/ns/dcat#")
    
    Thorge Petersen's avatar
    Thorge Petersen committed
    DCATDE_1_0_2 = rdflib.namespace.Namespace("http://dcat-ap.de/def/dcatde/1.0.2/")
    
    ADMS = rdflib.namespace.Namespace("http://www.w3.org/ns/adms#")
    
    root's avatar
    root committed
    
    class ODSHDCATdeProfile(DCATdeProfile):
    
        # from RDF
    
        def parse_dataset(self, dataset_dict, dataset_ref):
            dataset_dict = super(ODSHDCATdeProfile, self).parse_dataset(
                dataset_dict, dataset_ref
            )
            self._parse_distributions(dataset_dict, dataset_ref)
            self._parse_type(dataset_dict, dataset_ref)
    
            self._parse_references(dataset_dict, dataset_ref)
    
            self._parse_applicable_legislation(dataset_dict, dataset_ref)
            self._parse_hvd_category(dataset_dict, dataset_ref)
            
    
    root's avatar
    root committed
            if self._belongs_to_collection(dataset_dict, dataset_ref):
                self._mark_for_adding_to_ckan_collection(dataset_dict, dataset_ref)
            return dataset_dict
    
    root's avatar
    root committed
        def _parse_distributions(self, dataset_dict, dataset_ref):
            for distribution in self.g.objects(dataset_ref, DCAT.distribution):
                for resource_dict in dataset_dict.get('resources', []):
                    # Match distribution in graph and distribution in ckan-dict
    
    Thorge Petersen's avatar
    Thorge Petersen committed
                    if str(distribution) == resource_uri(resource_dict):
    
                        for namespace in [DCATDE, DCATDE_1_0, DCATDE_1_0_1, DCATDE_1_0_2]:
    
    root's avatar
    root committed
                            value = self._object_value(
                                distribution, namespace.licenseAttributionByText)
                            if value:
                                ds_utils.insert_new_extras_field(
                                    dataset_dict, 'licenseAttributionByText', value)
                                return
    
    root's avatar
    root committed
        def _parse_type(self, dataset_dict, dataset_ref):
            dct_type = self._object(dataset_ref, DCT.type)
            if dct_type:
    
                ckan_type = helpers_odsh.map_dct_type_to_ckan_type(str(dct_type))
    
    root's avatar
    root committed
                dataset_dict.update({'type': ckan_type})
    
        def _parse_references(self, dataset_dict, dataset_ref):
            value = self._object(dataset_ref, DCT.references)
            if value:
                ds_utils.insert_new_extras_field(dataset_dict, 'reference', value)
    
        
        def _parse_applicable_legislation(self, dataset_dict, dataset_ref):
            value = self._object(dataset_ref, DCT.applicableLegislation)
            if value:
                ds_utils.insert_new_extras_field(dataset_dict, 'applicableLegislation', value)
    
        def _parse_hvd_category(self, dataset_dict, dataset_ref):
            value = self._object(dataset_ref, DCT.hvdCategory)
            if value:
                ds_utils.insert_new_extras_field(dataset_dict, 'hvdCategory', value)
    
    root's avatar
    root committed
        def _belongs_to_collection(self, dataset_dict, dataset_ref):
            dct_is_version_of = self._object(dataset_ref, DCT.isVersionOf)
            belongs_to_collection = True if dct_is_version_of else False
            return belongs_to_collection
    
    root's avatar
    root committed
        def _mark_for_adding_to_ckan_collection(self, dataset_dict, dataset_ref):
            dataset_dict.update({'add_to_collection': True})
    
    
    root's avatar
    root committed
        def graph_from_dataset(self, dataset_dict, dataset_ref):
            '''
            this class inherits from ODSHDCATdeProfile
            it has been extended to add information to
            the rdf export
    
            '''
            super(ODSHDCATdeProfile, self).graph_from_dataset(
                dataset_dict, dataset_ref)
            self._add_contributor_id(dataset_dict, dataset_ref)
            self._add_license_attribution_by_text(dataset_dict, dataset_ref)
            self._add_type(dataset_dict, dataset_ref)
    
            self._add_modified_and_issued(dataset_dict, dataset_ref)
    
            self._add_extra_field(dataset_dict, dataset_ref, 'reference', DCT.references)
            self._add_extra_field(dataset_dict, dataset_ref, 'applicableLegislation', DCAT.applicableLegislation)
            self._add_extra_field(dataset_dict, dataset_ref, 'hvdCategory', DCAT.hvdCategory)
    
            self._add_version(dataset_dict, dataset_ref)
    
    root's avatar
    root committed
            if self._is_dataset_collection(dataset_dict):
                self._remove_predefined_collection_members()
                self._add_collection_members(dataset_dict, dataset_ref)
            if self._dataset_belongs_to_collection(dataset_dict):
                self._add_collection(dataset_dict, dataset_ref)
    
    root's avatar
    root committed
        def _add_contributor_id(self, dataset_dict, dataset_ref):
            contributorID = 'http://dcat-ap.de/def/contributors/schleswigHolstein'
            self.g.add(
    
    root's avatar
    root committed
                    rdflib.URIRef(contributorID)
    
    root's avatar
    root committed
            )
    
    root's avatar
    root committed
        def _add_license_attribution_by_text(self, dataset_dict, dataset_ref):
    
            licenseAttributionByText = self._get_dataset_value(
                dataset_dict, 'licenseAttributionByText')
    
    root's avatar
    root committed
            if licenseAttributionByText:
                self.g.set(
    
                    (dataset_ref, DCATDE.licenseAttributionByText,
                     rdflib.Literal(licenseAttributionByText))
    
    root's avatar
    root committed
                )
                for distribution in self.g.objects(dataset_ref, DCAT.distribution):
                    self.g.set(
    
                        (distribution, DCATDE.licenseAttributionByText,
                         rdflib.Literal(licenseAttributionByText))
    
    root's avatar
    root committed
                    )
    
        def _add_extra_field(self, dataset_dict, dataset_ref, field_name, dcat_property):
            uri = helpers_odsh.odsh_extract_value_from_extras(dataset_dict.get('extras'), field_name)
            if uri:
    
                    (dataset_ref, dcat_property, rdflib.URIRef(uri))
    
        def _add_modified_and_issued(self, dataset_dict, dataset_ref):
            '''
            Adds distributions last_modified and created values to
            dcat:modified and dcat:issued.
            '''
            for distribution in self.g.objects(dataset_ref, DCAT.distribution):
                for resource_dict in dataset_dict.get('resources', []):
                    # Match distribution in graph and distribution in ckan-dict
    
    Thorge Petersen's avatar
    Thorge Petersen committed
                    if str(distribution) == resource_uri(resource_dict):
    
                        last_modified = resource_dict.get('last_modified', None)
                        if last_modified:
                            self.g.set(
                                (distribution, DCT.modified, rdflib.Literal(
                                    last_modified, datatype="http://www.w3.org/2001/XMLSchema#dateTime"))
                            )
                        created = resource_dict.get('created', None)
                        if created:
                            self.g.set(
                                (distribution, DCT.issued, rdflib.Literal(
                                    created, datatype="http://www.w3.org/2001/XMLSchema#dateTime"))
                            )
    
    
        def _add_version(self, dataset_dict, dataset_ref):
            '''
            Adds CKAN isReplacedBy extra field to dublin core isReplacedBy and sets version information.
            '''
            new_version = helpers_odsh.odsh_extract_value_from_extras(dataset_dict.get('extras'), 'is_replaced_by')
            if new_version:
                self.g.set(
                    (dataset_ref, DCTERMS.isReplacedBy,
                        rdflib.URIRef(new_version)
                     )
                )
            
            version_notes = helpers_odsh.odsh_extract_value_from_extras(dataset_dict.get('extras'), 'version_notes')
            if version_notes:
                self.g.set(
                    (dataset_ref, ADMS.versionNotes,
                        rdflib.Literal(version_notes)
                     )
                )
        
    
    root's avatar
    root committed
        def _add_type(self, dataset_dict, dataset_ref):
            '''
            adds the type if there is a known mapping from ckan type to
            dct:type
            '''
            ckan_type = self._get_ckan_type(dataset_dict)
    
            dct_type = helpers_odsh.map_ckan_type_to_dct_type(ckan_type)
    
    root's avatar
    root committed
            if dct_type:
                self.g.set(
    
    root's avatar
    root committed
                        rdflib.URIRef(dct_type)
    
    root's avatar
    root committed
                )
    
    root's avatar
    root committed
        def _get_ckan_type(self, dataset_dict):
            ckan_type = self._get_dataset_value(dataset_dict, 'type')
            return ckan_type
    
    root's avatar
    root committed
        def _remove_predefined_collection_members(self):
            for s, p, o in self.g:
    
    root's avatar
    root committed
                    self.g.remove((s, p, o))
    
    root's avatar
    root committed
        def _add_collection_members(self, dataset_dict, dataset_ref):
    
            dataset_refs_belonging_to_collection = self._get_dataset_refs_belonging_to_collection(
                dataset_dict)
    
    root's avatar
    root committed
            for ref in dataset_refs_belonging_to_collection:
                self.g.add(
                    (dataset_ref, DCT.hasVersion, rdflib.URIRef(ref))
                )
    
    root's avatar
    root committed
        def _is_dataset_collection(self, dataset_dict):
            ckan_type = self._get_ckan_type(dataset_dict)
    
    root's avatar
    root committed
            return is_collection
    
    root's avatar
    root committed
        def _get_dataset_refs_belonging_to_collection(self, dataset_dict):
    
            dataset_names = helpers_collection.get_dataset_names(dataset_dict)
    
            dataset_refs = [self._construct_refs(name) for name in dataset_names]
    
    root's avatar
    root committed
            return dataset_refs
    
    root's avatar
    root committed
        @staticmethod
        def _construct_refs(id):
            public_url = config.get('ckan.site_url')
    
    Thorge Petersen's avatar
    Thorge Petersen committed
            url_to_id = helpers.url_for('dataset.read', id=id)
    
    root's avatar
    root committed
            ref = public_url + url_to_id
            return ref
    
    root's avatar
    root committed
        def _dataset_belongs_to_collection(self, dataset_dict):
            '''
            returns True if a containing collection is found
            '''
    
            if dataset_dict.get('type') == 'collection':
    
    root's avatar
    root committed
                return False
    
            collection_name = helpers_collection.get_collection_id(dataset_dict)
    
    root's avatar
    root committed
    
        def _add_collection(self, dataset_dict, dataset_ref):
    
            collection_id = helpers_collection.get_collection_id(dataset_dict)
    
    root's avatar
    root committed
            collection_uri = self._construct_refs(collection_id)
            self.g.set(
    
    root's avatar
    root committed
                    rdflib.URIRef(collection_uri)