Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import rdflib
from ckan.common import config
import ckan.lib.helpers as helpers
import ckan.model as model
from ckanext.dcat.profiles import DCT
from ckanext.dcat.utils import resource_uri
import ckanext.dcatde.dataset_utils as ds_utils
from ckanext.dcatde.profiles import DCATdeProfile, DCATDE, DCAT, DCATDE_1_0
import ckanext.odsh.helpers_tpsh as helpers_tpsh
import ckanext.odsh.collection.helpers as helpers_collection
from ckanext.odsh.helper_pkg_dict import HelperPgkDict
DCT = rdflib.namespace.Namespace("http://purl.org/dc/terms/")
DCAT = rdflib.namespace.Namespace("http://www.w3.org/ns/dcat#")
class ODSHDCATdeProfile(DCATdeProfile):
# from RDF
def parse_dataset(self, dataset_dict, dataset_ref):
dataset_dict = super(ODSHDCATdeProfile, self).parse_dataset(
dataset_dict, dataset_ref
)
self._parse_distributions(dataset_dict, dataset_ref)
self._parse_type(dataset_dict, dataset_ref)
if self._belongs_to_collection(dataset_dict, dataset_ref):
self._mark_for_adding_to_ckan_collection(dataset_dict, dataset_ref)
return dataset_dict
def _parse_distributions(self, dataset_dict, dataset_ref):
for distribution in self.g.objects(dataset_ref, DCAT.distribution):
for resource_dict in dataset_dict.get('resources', []):
# Match distribution in graph and distribution in ckan-dict
if unicode(distribution) == resource_uri(resource_dict):
for namespace in [DCATDE, DCATDE_1_0]:
value = self._object_value(
distribution, namespace.licenseAttributionByText)
if value:
ds_utils.insert_new_extras_field(
dataset_dict, 'licenseAttributionByText', value)
return
def _parse_type(self, dataset_dict, dataset_ref):
dct_type = self._object(dataset_ref, DCT.type)
if dct_type:
ckan_type = helpers_tpsh.map_dct_type_to_ckan_type(str(dct_type))
dataset_dict.update({'type': ckan_type})
def _belongs_to_collection(self, dataset_dict, dataset_ref):
dct_is_version_of = self._object(dataset_ref, DCT.isVersionOf)
belongs_to_collection = True if dct_is_version_of else False
return belongs_to_collection
def _mark_for_adding_to_ckan_collection(self, dataset_dict, dataset_ref):
dataset_dict.update({'add_to_collection': True})
# to RDF
def graph_from_dataset(self, dataset_dict, dataset_ref):
'''
this class inherits from ODSHDCATdeProfile
it has been extended to add information to
the rdf export
'''
super(ODSHDCATdeProfile, self).graph_from_dataset(
dataset_dict, dataset_ref)
self._add_contributor_id(dataset_dict, dataset_ref)
self._add_license_attribution_by_text(dataset_dict, dataset_ref)
self._add_type(dataset_dict, dataset_ref)
if self._is_dataset_collection(dataset_dict):
self._remove_predefined_collection_members()
self._add_collection_members(dataset_dict, dataset_ref)
if self._dataset_belongs_to_collection(dataset_dict):
self._add_collection(dataset_dict, dataset_ref)
def _add_contributor_id(self, dataset_dict, dataset_ref):
contributorID = 'http://dcat-ap.de/def/contributors/schleswigHolstein'
self.g.add(
(dataset_ref, DCATDE.contributorID,
rdflib.URIRef(contributorID)
)
)
def _add_license_attribution_by_text(self, dataset_dict, dataset_ref):
licenseAttributionByText = self._get_dataset_value(dataset_dict, 'licenseAttributionByText')
if licenseAttributionByText:
self.g.set(
(dataset_ref, DCATDE.licenseAttributionByText, rdflib.Literal(licenseAttributionByText))
)
for distribution in self.g.objects(dataset_ref, DCAT.distribution):
self.g.set(
(distribution, DCATDE.licenseAttributionByText, rdflib.Literal(licenseAttributionByText))
)
def _add_type(self, dataset_dict, dataset_ref):
'''
adds the type if there is a known mapping from ckan type to
dct:type
'''
ckan_type = self._get_ckan_type(dataset_dict)
dct_type = helpers_tpsh.map_ckan_type_to_dct_type(ckan_type)
if dct_type:
self.g.set(
(dataset_ref, DCT.type,
rdflib.URIRef(dct_type)
)
)
def _get_ckan_type(self, dataset_dict):
ckan_type = self._get_dataset_value(dataset_dict, 'type')
return ckan_type
def _remove_predefined_collection_members(self):
for s, p, o in self.g:
if p==DCT.hasVersion:
self.g.remove((s, p, o))
def _add_collection_members(self, dataset_dict, dataset_ref):
dataset_refs_belonging_to_collection = self._get_dataset_refs_belonging_to_collection(dataset_dict)
for ref in dataset_refs_belonging_to_collection:
self.g.add(
(dataset_ref, DCT.hasVersion, rdflib.URIRef(ref))
)
def _is_dataset_collection(self, dataset_dict):
ckan_type = self._get_ckan_type(dataset_dict)
is_collection = ckan_type=='collection'
return is_collection
def _get_dataset_refs_belonging_to_collection(self, dataset_dict):
dataset_names = helpers_collection.get_all_datasets_belonging_to_collection(
collection_name = dataset_dict.get('id')
)
dataset_dicts = [model.Package.get(name).as_dict() for name in dataset_names]
dataset_ids = [dataset_dict.get('id') for dataset_dict in dataset_dicts]
dataset_refs = [self._construct_refs(id) for id in dataset_ids]
return dataset_refs
@staticmethod
def _construct_refs(id):
public_url = config.get('ckan.site_url')
url_to_id = helpers.url_for(controller='package', action ='read', id=id)
ref = public_url + url_to_id
return ref
def _dataset_belongs_to_collection(self, dataset_dict):
'''
returns True if a containing collection is found
'''
if dataset_dict.get('type')=='collection':
return False
id_dataset = dataset_dict.get('id')
collection_name = helpers_collection.get_collection_name_by_dataset(id_dataset)
return collection_name is not None
def _add_collection(self, dataset_dict, dataset_ref):
collection_id = HelperPgkDict(dataset_dict).get_collection_id()
collection_uri = self._construct_refs(collection_id)
self.g.set(
(dataset_ref, DCT.isVersionOf,
rdflib.URIRef(collection_uri)
)
)