Skip to content
Snippets Groups Projects
odsh_dcat_de_profile.py 6.82 KiB
Newer Older
  • Learn to ignore specific revisions
  • root's avatar
    root committed
    import rdflib
    
    from ckan.common import config
    import ckan.lib.helpers as helpers
    import ckan.model as model
    from ckanext.dcat.profiles import DCT
    from ckanext.dcat.utils import resource_uri
    import ckanext.dcatde.dataset_utils as ds_utils
    from ckanext.dcatde.profiles import DCATdeProfile, DCATDE, DCAT, DCATDE_1_0
    
    import ckanext.odsh.helpers_tpsh as helpers_tpsh
    import ckanext.odsh.collection.helpers as helpers_collection
    from ckanext.odsh.helper_pkg_dict import HelperPgkDict
    
    
    DCT = rdflib.namespace.Namespace("http://purl.org/dc/terms/")
    DCAT = rdflib.namespace.Namespace("http://www.w3.org/ns/dcat#")
    
    
    class ODSHDCATdeProfile(DCATdeProfile):
    
        # from RDF
    
        def parse_dataset(self, dataset_dict, dataset_ref):
            dataset_dict = super(ODSHDCATdeProfile, self).parse_dataset(
                dataset_dict, dataset_ref
            )
            self._parse_distributions(dataset_dict, dataset_ref)
            self._parse_type(dataset_dict, dataset_ref)
            if self._belongs_to_collection(dataset_dict, dataset_ref):
                self._mark_for_adding_to_ckan_collection(dataset_dict, dataset_ref)
            return dataset_dict
        
        def _parse_distributions(self, dataset_dict, dataset_ref):
            for distribution in self.g.objects(dataset_ref, DCAT.distribution):
                for resource_dict in dataset_dict.get('resources', []):
                    # Match distribution in graph and distribution in ckan-dict
                    if unicode(distribution) == resource_uri(resource_dict):
                        for namespace in [DCATDE, DCATDE_1_0]:
                            value = self._object_value(
                                distribution, namespace.licenseAttributionByText)
                            if value:
                                ds_utils.insert_new_extras_field(
                                    dataset_dict, 'licenseAttributionByText', value)
                                return
        
        def _parse_type(self, dataset_dict, dataset_ref):
            dct_type = self._object(dataset_ref, DCT.type)
            if dct_type:
                ckan_type = helpers_tpsh.map_dct_type_to_ckan_type(str(dct_type))
                dataset_dict.update({'type': ckan_type})
        
        def _belongs_to_collection(self, dataset_dict, dataset_ref):
            dct_is_version_of = self._object(dataset_ref, DCT.isVersionOf)
            belongs_to_collection = True if dct_is_version_of else False
            return belongs_to_collection
        
        def _mark_for_adding_to_ckan_collection(self, dataset_dict, dataset_ref):
            dataset_dict.update({'add_to_collection': True})
    
        
        # to RDF    
        
        def graph_from_dataset(self, dataset_dict, dataset_ref):
            '''
            this class inherits from ODSHDCATdeProfile
            it has been extended to add information to
            the rdf export
    
            '''
            super(ODSHDCATdeProfile, self).graph_from_dataset(
                dataset_dict, dataset_ref)
            self._add_contributor_id(dataset_dict, dataset_ref)
            self._add_license_attribution_by_text(dataset_dict, dataset_ref)
            self._add_type(dataset_dict, dataset_ref)
            if self._is_dataset_collection(dataset_dict):
                self._remove_predefined_collection_members()
                self._add_collection_members(dataset_dict, dataset_ref)
            if self._dataset_belongs_to_collection(dataset_dict):
                self._add_collection(dataset_dict, dataset_ref)
        
        def _add_contributor_id(self, dataset_dict, dataset_ref):
            contributorID = 'http://dcat-ap.de/def/contributors/schleswigHolstein'
            self.g.add(
                (dataset_ref, DCATDE.contributorID, 
                    rdflib.URIRef(contributorID)
                )
            )
        
        def _add_license_attribution_by_text(self, dataset_dict, dataset_ref):
            licenseAttributionByText = self._get_dataset_value(dataset_dict, 'licenseAttributionByText')
            if licenseAttributionByText:
                self.g.set(
                    (dataset_ref, DCATDE.licenseAttributionByText, rdflib.Literal(licenseAttributionByText))
                )
                for distribution in self.g.objects(dataset_ref, DCAT.distribution):
                    self.g.set(
                        (distribution, DCATDE.licenseAttributionByText, rdflib.Literal(licenseAttributionByText))
                    )
        
        def _add_type(self, dataset_dict, dataset_ref):
            '''
            adds the type if there is a known mapping from ckan type to
            dct:type
            '''
            ckan_type = self._get_ckan_type(dataset_dict)
            dct_type = helpers_tpsh.map_ckan_type_to_dct_type(ckan_type)
            if dct_type:
                self.g.set(
                    (dataset_ref, DCT.type, 
                        rdflib.URIRef(dct_type)
                    )
                )
        
        def _get_ckan_type(self, dataset_dict):
            ckan_type = self._get_dataset_value(dataset_dict, 'type')
            return ckan_type
        
        def _remove_predefined_collection_members(self):
            for s, p, o in self.g:
                if p==DCT.hasVersion:
                    self.g.remove((s, p, o))
        
        def _add_collection_members(self, dataset_dict, dataset_ref):
            dataset_refs_belonging_to_collection = self._get_dataset_refs_belonging_to_collection(dataset_dict)
            for ref in dataset_refs_belonging_to_collection:
                self.g.add(
                    (dataset_ref, DCT.hasVersion, rdflib.URIRef(ref))
                )
        
        def _is_dataset_collection(self, dataset_dict):
            ckan_type = self._get_ckan_type(dataset_dict)
            is_collection = ckan_type=='collection'
            return is_collection
        
        def _get_dataset_refs_belonging_to_collection(self, dataset_dict):
            dataset_names = helpers_collection.get_all_datasets_belonging_to_collection(
                collection_name = dataset_dict.get('id')
            )
            dataset_dicts = [model.Package.get(name).as_dict() for name in dataset_names]
            dataset_ids = [dataset_dict.get('id') for dataset_dict in dataset_dicts]
            dataset_refs = [self._construct_refs(id) for id in dataset_ids]
            return dataset_refs
        
        @staticmethod
        def _construct_refs(id):
            public_url = config.get('ckan.site_url')
            url_to_id = helpers.url_for(controller='package', action ='read', id=id)
            ref = public_url + url_to_id
            return ref
        
        def _dataset_belongs_to_collection(self, dataset_dict):
            '''
            returns True if a containing collection is found
            '''
            if dataset_dict.get('type')=='collection':
                return False
            id_dataset = dataset_dict.get('id')
            collection_name = helpers_collection.get_collection_name_by_dataset(id_dataset)
            return collection_name is not None
    
        def _add_collection(self, dataset_dict, dataset_ref):
            collection_id = HelperPgkDict(dataset_dict).get_collection_id()
            collection_uri = self._construct_refs(collection_id)
            self.g.set(
                (dataset_ref, DCT.isVersionOf, 
                    rdflib.URIRef(collection_uri)
                )
            )