Skip to content
Snippets Groups Projects
odsh_dcat_de_profile.py 8.62 KiB
Newer Older
  • Learn to ignore specific revisions
  • root's avatar
    root committed
    import rdflib
    
    from ckan.common import config
    import ckan.lib.helpers as helpers
    import ckan.model as model
    from ckanext.dcat.profiles import DCT
    from ckanext.dcat.utils import resource_uri
    import ckanext.dcatde.dataset_utils as ds_utils
    
    Thorge Petersen's avatar
    Thorge Petersen committed
    from ckanext.dcatde.profiles import DCATdeProfile, DCATDE, DCAT, DCATDE_1_0, DCATDE_1_0_1
    
    root's avatar
    root committed
    
    
    import ckanext.odsh.helpers as odsh_helpers
    
    root's avatar
    root committed
    import ckanext.odsh.helpers_tpsh as helpers_tpsh
    import ckanext.odsh.collection.helpers as helpers_collection
    
    
    DCT = rdflib.namespace.Namespace("http://purl.org/dc/terms/")
    DCAT = rdflib.namespace.Namespace("http://www.w3.org/ns/dcat#")
    
    Thorge Petersen's avatar
    Thorge Petersen committed
    DCATDE_1_0_2 = rdflib.namespace.Namespace("http://dcat-ap.de/def/dcatde/1.0.2/")
    
    root's avatar
    root committed
    
    
    class ODSHDCATdeProfile(DCATdeProfile):
    
        # from RDF
    
        def parse_dataset(self, dataset_dict, dataset_ref):
            dataset_dict = super(ODSHDCATdeProfile, self).parse_dataset(
                dataset_dict, dataset_ref
            )
            self._parse_distributions(dataset_dict, dataset_ref)
            self._parse_type(dataset_dict, dataset_ref)
    
            self._parse_references(dataset_dict, dataset_ref)
    
    root's avatar
    root committed
            if self._belongs_to_collection(dataset_dict, dataset_ref):
                self._mark_for_adding_to_ckan_collection(dataset_dict, dataset_ref)
            return dataset_dict
    
    root's avatar
    root committed
        def _parse_distributions(self, dataset_dict, dataset_ref):
            for distribution in self.g.objects(dataset_ref, DCAT.distribution):
                for resource_dict in dataset_dict.get('resources', []):
                    # Match distribution in graph and distribution in ckan-dict
    
    Thorge Petersen's avatar
    Thorge Petersen committed
                    if str(distribution) == resource_uri(resource_dict):
    
                        for namespace in [DCATDE, DCATDE_1_0, DCATDE_1_0_1, DCATDE_1_0_2]:
    
    root's avatar
    root committed
                            value = self._object_value(
                                distribution, namespace.licenseAttributionByText)
                            if value:
                                ds_utils.insert_new_extras_field(
                                    dataset_dict, 'licenseAttributionByText', value)
                                return
    
    root's avatar
    root committed
        def _parse_type(self, dataset_dict, dataset_ref):
            dct_type = self._object(dataset_ref, DCT.type)
            if dct_type:
                ckan_type = helpers_tpsh.map_dct_type_to_ckan_type(str(dct_type))
                dataset_dict.update({'type': ckan_type})
    
        def _parse_references(self, dataset_dict, dataset_ref):
            value = self._object(dataset_ref, DCT.references)
            if value:
                ds_utils.insert_new_extras_field(dataset_dict, 'reference', value)
    
    
    root's avatar
    root committed
        def _belongs_to_collection(self, dataset_dict, dataset_ref):
            dct_is_version_of = self._object(dataset_ref, DCT.isVersionOf)
            belongs_to_collection = True if dct_is_version_of else False
            return belongs_to_collection
    
    root's avatar
    root committed
        def _mark_for_adding_to_ckan_collection(self, dataset_dict, dataset_ref):
            dataset_dict.update({'add_to_collection': True})
    
    
    root's avatar
    root committed
        def graph_from_dataset(self, dataset_dict, dataset_ref):
            '''
            this class inherits from ODSHDCATdeProfile
            it has been extended to add information to
            the rdf export
    
            '''
            super(ODSHDCATdeProfile, self).graph_from_dataset(
                dataset_dict, dataset_ref)
            self._add_contributor_id(dataset_dict, dataset_ref)
            self._add_license_attribution_by_text(dataset_dict, dataset_ref)
            self._add_type(dataset_dict, dataset_ref)
    
            self._add_modified_and_issued(dataset_dict, dataset_ref)
    
            self._add_references(dataset_dict, dataset_ref)
    
    root's avatar
    root committed
            if self._is_dataset_collection(dataset_dict):
                self._remove_predefined_collection_members()
                self._add_collection_members(dataset_dict, dataset_ref)
            if self._dataset_belongs_to_collection(dataset_dict):
                self._add_collection(dataset_dict, dataset_ref)
    
    root's avatar
    root committed
        def _add_contributor_id(self, dataset_dict, dataset_ref):
            contributorID = 'http://dcat-ap.de/def/contributors/schleswigHolstein'
            self.g.add(
    
    root's avatar
    root committed
                    rdflib.URIRef(contributorID)
    
    root's avatar
    root committed
            )
    
    root's avatar
    root committed
        def _add_license_attribution_by_text(self, dataset_dict, dataset_ref):
    
            licenseAttributionByText = self._get_dataset_value(
                dataset_dict, 'licenseAttributionByText')
    
    root's avatar
    root committed
            if licenseAttributionByText:
                self.g.set(
    
                    (dataset_ref, DCATDE.licenseAttributionByText,
                     rdflib.Literal(licenseAttributionByText))
    
    root's avatar
    root committed
                )
                for distribution in self.g.objects(dataset_ref, DCAT.distribution):
                    self.g.set(
    
                        (distribution, DCATDE.licenseAttributionByText,
                         rdflib.Literal(licenseAttributionByText))
    
    root's avatar
    root committed
                    )
    
        def _add_references(self, dataset_dict, dataset_ref):
            '''
            Adds reference (Musterdatenkatalog/Musterdatensatz) extra field to
            dcat:references.
            '''
            sample_dataset_uri = odsh_helpers.odsh_extract_value_from_extras(dataset_dict.get('extras'), 'reference')
            if sample_dataset_uri:
                self.g.set(
                    (dataset_ref, DCT.references,
                        rdflib.URIRef(sample_dataset_uri)
                     )
                )
        
    
        def _add_modified_and_issued(self, dataset_dict, dataset_ref):
            '''
            Adds distributions last_modified and created values to
            dcat:modified and dcat:issued.
            '''
            for distribution in self.g.objects(dataset_ref, DCAT.distribution):
                for resource_dict in dataset_dict.get('resources', []):
                    # Match distribution in graph and distribution in ckan-dict
    
    Thorge Petersen's avatar
    Thorge Petersen committed
                    if str(distribution) == resource_uri(resource_dict):
    
                        last_modified = resource_dict.get('last_modified', None)
                        if last_modified:
                            self.g.set(
                                (distribution, DCT.modified, rdflib.Literal(
                                    last_modified, datatype="http://www.w3.org/2001/XMLSchema#dateTime"))
                            )
                        created = resource_dict.get('created', None)
                        if created:
                            self.g.set(
                                (distribution, DCT.issued, rdflib.Literal(
                                    created, datatype="http://www.w3.org/2001/XMLSchema#dateTime"))
                            )
    
    
    root's avatar
    root committed
        def _add_type(self, dataset_dict, dataset_ref):
            '''
            adds the type if there is a known mapping from ckan type to
            dct:type
            '''
            ckan_type = self._get_ckan_type(dataset_dict)
            dct_type = helpers_tpsh.map_ckan_type_to_dct_type(ckan_type)
            if dct_type:
                self.g.set(
    
    root's avatar
    root committed
                        rdflib.URIRef(dct_type)
    
    root's avatar
    root committed
                )
    
    root's avatar
    root committed
        def _get_ckan_type(self, dataset_dict):
            ckan_type = self._get_dataset_value(dataset_dict, 'type')
            return ckan_type
    
    root's avatar
    root committed
        def _remove_predefined_collection_members(self):
            for s, p, o in self.g:
    
    root's avatar
    root committed
                    self.g.remove((s, p, o))
    
    root's avatar
    root committed
        def _add_collection_members(self, dataset_dict, dataset_ref):
    
            dataset_refs_belonging_to_collection = self._get_dataset_refs_belonging_to_collection(
                dataset_dict)
    
    root's avatar
    root committed
            for ref in dataset_refs_belonging_to_collection:
                self.g.add(
                    (dataset_ref, DCT.hasVersion, rdflib.URIRef(ref))
                )
    
    root's avatar
    root committed
        def _is_dataset_collection(self, dataset_dict):
            ckan_type = self._get_ckan_type(dataset_dict)
    
    root's avatar
    root committed
            return is_collection
    
    root's avatar
    root committed
        def _get_dataset_refs_belonging_to_collection(self, dataset_dict):
    
            dataset_names = helpers_collection.get_dataset_names(dataset_dict)
    
            dataset_refs = [self._construct_refs(name) for name in dataset_names]
    
    root's avatar
    root committed
            return dataset_refs
    
    root's avatar
    root committed
        @staticmethod
        def _construct_refs(id):
            public_url = config.get('ckan.site_url')
    
    Thorge Petersen's avatar
    Thorge Petersen committed
            url_to_id = helpers.url_for('dataset.read', id=id)
    
    root's avatar
    root committed
            ref = public_url + url_to_id
            return ref
    
    root's avatar
    root committed
        def _dataset_belongs_to_collection(self, dataset_dict):
            '''
            returns True if a containing collection is found
            '''
    
            if dataset_dict.get('type') == 'collection':
    
    root's avatar
    root committed
                return False
    
            collection_name = helpers_collection.get_collection_id(dataset_dict)
    
    root's avatar
    root committed
    
        def _add_collection(self, dataset_dict, dataset_ref):
    
            collection_id = helpers_collection.get_collection_id(dataset_dict)
    
    root's avatar
    root committed
            collection_uri = self._construct_refs(collection_id)
            self.g.set(
    
    root's avatar
    root committed
                    rdflib.URIRef(collection_uri)