Skip to content
Snippets Groups Projects
Commit 6d51b7e4 authored by anonymous's avatar anonymous
Browse files

fix license export

parent b791b6fd
Branches
Tags
No related merge requests found
......@@ -20,6 +20,7 @@ log = logging.getLogger(__name__)
DCT = rdflib.namespace.Namespace("http://purl.org/dc/terms/")
DCAT = rdflib.namespace.Namespace("http://www.w3.org/ns/dcat#")
class ODSHEuropeanDCATAPProfile(EuropeanDCATAPProfile):
def _license(self, dataset_ref):
......@@ -48,17 +49,20 @@ class ODSHEuropeanDCATAPProfile(EuropeanDCATAPProfile):
return ''
def _distribution_format(self, distribution, normalize_ckan_format=True):
imt, label = super(ODSHEuropeanDCATAPProfile,self)._distribution_format(distribution, normalize_ckan_format)
imt, label = super(ODSHEuropeanDCATAPProfile, self)._distribution_format(
distribution, normalize_ckan_format)
if label in resource_formats_import():
label = resource_formats_import()[label]
return imt, label
def graph_from_dataset(self, dataset_dict, dataset_ref):
super(ODSHEuropeanDCATAPProfile,self).graph_from_dataset(dataset_dict, dataset_ref)
super(ODSHEuropeanDCATAPProfile, self).graph_from_dataset(
dataset_dict, dataset_ref)
for s, p, o in self.g.triples((None, rdflib.RDF.type, DCAT.Distribution)):
for s2, p2, o2 in self.g.triples((s, DCT['format'], None)):
if o2.decode() in resource_formats_export():
self.g.set((s, DCT['format'], rdflib.URIRef(resource_formats_export()[o2.decode()])))
self.g.set((s, DCT['format'], rdflib.URIRef(
resource_formats_export()[o2.decode()])))
for s, p, o in self.g.triples((None, DCT.language, None)):
if o.decode() in get_language():
self.g.set((s, p, rdflib.URIRef(get_language()[o.decode()])))
......@@ -74,29 +78,49 @@ class ODSHEuropeanDCATAPProfile(EuropeanDCATAPProfile):
class ODSHDCATdeProfile(DCATdeProfile):
def parse_dataset(self, dataset_dict, dataset_ref):
dataset_dict = super(ODSHDCATdeProfile,self).parse_dataset(dataset_dict, dataset_ref)
dataset_dict = super(ODSHDCATdeProfile, self).parse_dataset(
dataset_dict, dataset_ref)
# Enhance Distributions
for distribution in self.g.objects(dataset_ref, DCAT.distribution):
for resource_dict in dataset_dict.get('resources', []):
# Match distribution in graph and distribution in ckan-dict
if unicode(distribution) == resource_uri(resource_dict):
for namespace in [DCATDE, DCATDE_1_0]:
value = self._object_value(distribution, namespace.licenseAttributionByText)
value = self._object_value(
distribution, namespace.licenseAttributionByText)
if value:
ds_utils.insert_new_extras_field(dataset_dict, 'licenseAttributionByText', value)
ds_utils.insert_new_extras_field(
dataset_dict, 'licenseAttributionByText', value)
return dataset_dict
return dataset_dict
def graph_from_dataset(self, dataset_dict, dataset_ref):
super(ODSHDCATdeProfile,self).graph_from_dataset(dataset_dict, dataset_ref)
super(ODSHDCATdeProfile, self).graph_from_dataset(
dataset_dict, dataset_ref)
# Enhance Distributions
# <dcatde:contributorID rdf:resource="http://dcat-ap.de/def/contributors/schleswigHolstein"/>
self.g.add((dataset_ref, DCATDE.contributorID, rdflib.URIRef("http://dcat-ap.de/def/contributors/schleswigHolstein")))
self.g.add((dataset_ref, DCATDE.contributorID, rdflib.URIRef(
"http://dcat-ap.de/def/contributors/schleswigHolstein")))
extras = dataset_dict.get('extras', None)
if extras:
attr = None
for d in extras:
if d['key'] == 'licenseAttributionByText':
attr = d['value']
break
if attr:
self.g.set(
(dataset_ref, DCATDE.licenseAttributionByText, rdflib.Literal(attr)))
for dist in self.g.objects(dataset_ref, DCAT.distribution):
self.g.set(
(dist, DCATDE.licenseAttributionByText, rdflib.Literal(attr)))
_RESOURCE_FORMATS_IMPORT = None
_RESOURCE_FORMATS_EXPORT = None
def resource_formats():
global _RESOURCE_FORMATS_IMPORT
global _RESOURCE_FORMATS_EXPORT
......@@ -105,7 +129,8 @@ def resource_formats():
g = rdflib.Graph()
# Something went wrong with trying to get the file formats online, try to use backup instead
try:
fallback_filepath = config.get('ckan.odsh.resource_formats_fallback_filepath')
fallback_filepath = config.get(
'ckan.odsh.resource_formats_fallback_filepath')
g.parse(fallback_filepath)
assert len(set([s for s in g.subjects()])) > 120
except:
......@@ -118,12 +143,14 @@ def resource_formats():
_RESOURCE_FORMATS_EXPORT[elem.split('/')[-1]] = elem
_RESOURCE_FORMATS_IMPORT[elem] = elem.split('/')[-1]
def resource_formats_export():
global _RESOURCE_FORMATS_EXPORT
if not _RESOURCE_FORMATS_EXPORT:
resource_formats()
return _RESOURCE_FORMATS_EXPORT
def resource_formats_import():
global _RESOURCE_FORMATS_IMPORT
if not _RESOURCE_FORMATS_IMPORT:
......@@ -133,6 +160,7 @@ def resource_formats_import():
_LANGUAGES = None
def get_language():
''' When datasets are exported in rdf-format, their language-tag
should be given as
......@@ -149,7 +177,8 @@ def get_language():
_LANGUAGES = {}
languages_file_path = config.get('ckanext.odsh.language.mapping')
if not languages_file_path:
log.warning("Could not find config setting: 'ckanext.odsh.language.mapping', using fallback instead.")
log.warning(
"Could not find config setting: 'ckanext.odsh.language.mapping', using fallback instead.")
languages_file_path = '/usr/lib/ckan/default/src/ckanext-odsh/languages.json'
with open(languages_file_path) as languages_file:
try:
......
......@@ -17,12 +17,15 @@ hostPort = 5002
# TODO: better was to set data on RequestHandler
data = ""
class RequestHandler(BaseHTTPRequestHandler):
# GET
def do_GET(self):
self.send_response(requests.codes.ok)
self.send_header('Content-Type', 'application/json; charset=utf-8')
# self.send_header('Content-Type', 'application/json; charset=utf-8')
self.send_header(
'Content-Type', 'application/rdf+xml; charset=utf-8')
self.end_headers()
self.wfile.write(data.encode("utf-8"))
......@@ -46,7 +49,8 @@ class HarvestServerMock(threading.Thread):
self._stop_event = threading.Event()
self.thread_name = self.__class__
self.server = HTTPServer((hostName, hostPort), RequestHandler)
threading.Thread.__init__(self, name=self.thread_name, target=self.server.serve_forever)
threading.Thread.__init__(
self, name=self.thread_name, target=self.server.serve_forever)
self.setDaemon(True)
......@@ -60,6 +64,7 @@ class HarvestServerMock(threading.Thread):
# except KeyboardInterrupt:
# pass
def close(self):
self.server.server_close()
# print(time.asctime(), "Server Stops - %s:%s" % (hostName, hostPort))
......@@ -9,6 +9,8 @@ import urllib2
import ckan.tests.helpers as helpers
from ckan.common import config
import ckan.config.middleware
from ckanext.dcatde.profiles import DCATDE, DCAT, DCATDE_1_0
import pdb
# run with nosetests --ckan --nologcapture --with-pylons=<config to test> ckanext/odsh/tests/test_routes.py
......@@ -44,9 +46,10 @@ class TestRDFExport:
issued='27-01-2000',
extras=extras,
owner_org='test',
license_id="http://dcat-ap.de/def/licenses/dl-by-de/2.0")
license_id="http://dcat-ap.de/def/licenses/dl-by-de/2.0",
licenseAttributionByText='foo')
factories.Resource(
package_id=dataset['id'], license=dataset['license_id'])
package_id=dataset['id'], license=dataset['license_id'], licenseAttributionByText='foo')
factories.Resource(
package_id=dataset['id'])
......@@ -54,9 +57,25 @@ class TestRDFExport:
response = self._get_app().get('/dataset/'+dataset['name']+'.rdf')
g.parse(data=response.body)
lic = self._extract_licenses(g)
att = self._extract_licenseAttributions(g)
assert len(lic) == 3
assert len(att) == 3
assert len(set([str(l) for l in lic])) == 1
assert len(set([str(a) for a in att])) == 1
assert str(att[0]) == 'text'
def test_catalog(self):
g = rdflib.Graph()
response = self._get_app().get('/catalog.xml')
g.parse(data=response.body)
datasets = list(g.subjects(RDF.type, DCAT.Dataset))
response = self._get_app().get('/api/3/action/package_search')
plist = json.loads(response.body)
assert len(datasets) == plist['result']['count']
def _get_app(self):
if not hasattr(self, 'app'):
......@@ -78,3 +97,20 @@ class TestRDFExport:
ret.append(l)
return ret
def _extract_licenseAttributions(self, g):
datasets = list(g.subjects(RDF.type, DCAT.Dataset))
assert len(datasets) == 1
dataset = datasets[0]
ret = []
for namespace in [DCATDE, DCATDE_1_0]:
ret += list(g.objects(dataset, namespace.licenseAttributionByText))
distributions = list(g.objects(dataset, DCAT.distribution))
for d in distributions:
for namespace in [DCATDE, DCATDE_1_0]:
ret += list(g.objects(d, namespace.licenseAttributionByText))
return ret
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment