Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
ckanext-odsh
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Open-Data
ckanext-odsh
Commits
6d51b7e4
Commit
6d51b7e4
authored
6 years ago
by
anonymous
Browse files
Options
Downloads
Patches
Plain Diff
fix license export
parent
b791b6fd
Branches
Branches containing commit
Tags
Tags containing commit
No related merge requests found
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
ckanext/odsh/profiles.py
+56
-27
56 additions, 27 deletions
ckanext/odsh/profiles.py
ckanext/odsh/tests/harvest_sever_mock.py
+7
-2
7 additions, 2 deletions
ckanext/odsh/tests/harvest_sever_mock.py
ckanext/odsh/tests/test_rdfexport.py
+38
-2
38 additions, 2 deletions
ckanext/odsh/tests/test_rdfexport.py
with
101 additions
and
31 deletions
ckanext/odsh/profiles.py
+
56
−
27
View file @
6d51b7e4
...
...
@@ -20,6 +20,7 @@ log = logging.getLogger(__name__)
DCT
=
rdflib
.
namespace
.
Namespace
(
"
http://purl.org/dc/terms/
"
)
DCAT
=
rdflib
.
namespace
.
Namespace
(
"
http://www.w3.org/ns/dcat#
"
)
class
ODSHEuropeanDCATAPProfile
(
EuropeanDCATAPProfile
):
def
_license
(
self
,
dataset_ref
):
...
...
@@ -48,17 +49,20 @@ class ODSHEuropeanDCATAPProfile(EuropeanDCATAPProfile):
return
''
def
_distribution_format
(
self
,
distribution
,
normalize_ckan_format
=
True
):
imt
,
label
=
super
(
ODSHEuropeanDCATAPProfile
,
self
).
_distribution_format
(
distribution
,
normalize_ckan_format
)
imt
,
label
=
super
(
ODSHEuropeanDCATAPProfile
,
self
).
_distribution_format
(
distribution
,
normalize_ckan_format
)
if
label
in
resource_formats_import
():
label
=
resource_formats_import
()[
label
]
return
imt
,
label
def
graph_from_dataset
(
self
,
dataset_dict
,
dataset_ref
):
super
(
ODSHEuropeanDCATAPProfile
,
self
).
graph_from_dataset
(
dataset_dict
,
dataset_ref
)
super
(
ODSHEuropeanDCATAPProfile
,
self
).
graph_from_dataset
(
dataset_dict
,
dataset_ref
)
for
s
,
p
,
o
in
self
.
g
.
triples
((
None
,
rdflib
.
RDF
.
type
,
DCAT
.
Distribution
)):
for
s2
,
p2
,
o2
in
self
.
g
.
triples
((
s
,
DCT
[
'
format
'
],
None
)):
if
o2
.
decode
()
in
resource_formats_export
():
self
.
g
.
set
((
s
,
DCT
[
'
format
'
],
rdflib
.
URIRef
(
resource_formats_export
()[
o2
.
decode
()])))
self
.
g
.
set
((
s
,
DCT
[
'
format
'
],
rdflib
.
URIRef
(
resource_formats_export
()[
o2
.
decode
()])))
for
s
,
p
,
o
in
self
.
g
.
triples
((
None
,
DCT
.
language
,
None
)):
if
o
.
decode
()
in
get_language
():
self
.
g
.
set
((
s
,
p
,
rdflib
.
URIRef
(
get_language
()[
o
.
decode
()])))
...
...
@@ -74,29 +78,49 @@ class ODSHEuropeanDCATAPProfile(EuropeanDCATAPProfile):
class
ODSHDCATdeProfile
(
DCATdeProfile
):
def
parse_dataset
(
self
,
dataset_dict
,
dataset_ref
):
dataset_dict
=
super
(
ODSHDCATdeProfile
,
self
).
parse_dataset
(
dataset_dict
,
dataset_ref
)
dataset_dict
=
super
(
ODSHDCATdeProfile
,
self
).
parse_dataset
(
dataset_dict
,
dataset_ref
)
# Enhance Distributions
for
distribution
in
self
.
g
.
objects
(
dataset_ref
,
DCAT
.
distribution
):
for
resource_dict
in
dataset_dict
.
get
(
'
resources
'
,
[]):
# Match distribution in graph and distribution in ckan-dict
if
unicode
(
distribution
)
==
resource_uri
(
resource_dict
):
for
namespace
in
[
DCATDE
,
DCATDE_1_0
]:
value
=
self
.
_object_value
(
distribution
,
namespace
.
licenseAttributionByText
)
value
=
self
.
_object_value
(
distribution
,
namespace
.
licenseAttributionByText
)
if
value
:
ds_utils
.
insert_new_extras_field
(
dataset_dict
,
'
licenseAttributionByText
'
,
value
)
ds_utils
.
insert_new_extras_field
(
dataset_dict
,
'
licenseAttributionByText
'
,
value
)
return
dataset_dict
return
dataset_dict
def
graph_from_dataset
(
self
,
dataset_dict
,
dataset_ref
):
super
(
ODSHDCATdeProfile
,
self
).
graph_from_dataset
(
dataset_dict
,
dataset_ref
)
super
(
ODSHDCATdeProfile
,
self
).
graph_from_dataset
(
dataset_dict
,
dataset_ref
)
# Enhance Distributions
# <dcatde:contributorID rdf:resource="http://dcat-ap.de/def/contributors/schleswigHolstein"/>
self
.
g
.
add
((
dataset_ref
,
DCATDE
.
contributorID
,
rdflib
.
URIRef
(
"
http://dcat-ap.de/def/contributors/schleswigHolstein
"
)))
self
.
g
.
add
((
dataset_ref
,
DCATDE
.
contributorID
,
rdflib
.
URIRef
(
"
http://dcat-ap.de/def/contributors/schleswigHolstein
"
)))
extras
=
dataset_dict
.
get
(
'
extras
'
,
None
)
if
extras
:
attr
=
None
for
d
in
extras
:
if
d
[
'
key
'
]
==
'
licenseAttributionByText
'
:
attr
=
d
[
'
value
'
]
break
if
attr
:
self
.
g
.
set
(
(
dataset_ref
,
DCATDE
.
licenseAttributionByText
,
rdflib
.
Literal
(
attr
)))
for
dist
in
self
.
g
.
objects
(
dataset_ref
,
DCAT
.
distribution
):
self
.
g
.
set
(
(
dist
,
DCATDE
.
licenseAttributionByText
,
rdflib
.
Literal
(
attr
)))
_RESOURCE_FORMATS_IMPORT
=
None
_RESOURCE_FORMATS_EXPORT
=
None
def
resource_formats
():
global
_RESOURCE_FORMATS_IMPORT
global
_RESOURCE_FORMATS_EXPORT
...
...
@@ -105,7 +129,8 @@ def resource_formats():
g
=
rdflib
.
Graph
()
# Something went wrong with trying to get the file formats online, try to use backup instead
try
:
fallback_filepath
=
config
.
get
(
'
ckan.odsh.resource_formats_fallback_filepath
'
)
fallback_filepath
=
config
.
get
(
'
ckan.odsh.resource_formats_fallback_filepath
'
)
g
.
parse
(
fallback_filepath
)
assert
len
(
set
([
s
for
s
in
g
.
subjects
()]))
>
120
except
:
...
...
@@ -118,12 +143,14 @@ def resource_formats():
_RESOURCE_FORMATS_EXPORT
[
elem
.
split
(
'
/
'
)[
-
1
]]
=
elem
_RESOURCE_FORMATS_IMPORT
[
elem
]
=
elem
.
split
(
'
/
'
)[
-
1
]
def
resource_formats_export
():
global
_RESOURCE_FORMATS_EXPORT
if
not
_RESOURCE_FORMATS_EXPORT
:
resource_formats
()
return
_RESOURCE_FORMATS_EXPORT
def
resource_formats_import
():
global
_RESOURCE_FORMATS_IMPORT
if
not
_RESOURCE_FORMATS_IMPORT
:
...
...
@@ -133,6 +160,7 @@ def resource_formats_import():
_LANGUAGES
=
None
def
get_language
():
'''
When datasets are exported in rdf-format, their language-tag
should be given as
...
...
@@ -149,7 +177,8 @@ def get_language():
_LANGUAGES
=
{}
languages_file_path
=
config
.
get
(
'
ckanext.odsh.language.mapping
'
)
if
not
languages_file_path
:
log
.
warning
(
"
Could not find config setting:
'
ckanext.odsh.language.mapping
'
, using fallback instead.
"
)
log
.
warning
(
"
Could not find config setting:
'
ckanext.odsh.language.mapping
'
, using fallback instead.
"
)
languages_file_path
=
'
/usr/lib/ckan/default/src/ckanext-odsh/languages.json
'
with
open
(
languages_file_path
)
as
languages_file
:
try
:
...
...
This diff is collapsed.
Click to expand it.
ckanext/odsh/tests/harvest_sever_mock.py
+
7
−
2
View file @
6d51b7e4
...
...
@@ -17,12 +17,15 @@ hostPort = 5002
# TODO: better was to set data on RequestHandler
data
=
""
class
RequestHandler
(
BaseHTTPRequestHandler
):
# GET
def
do_GET
(
self
):
self
.
send_response
(
requests
.
codes
.
ok
)
self
.
send_header
(
'
Content-Type
'
,
'
application/json; charset=utf-8
'
)
# self.send_header('Content-Type', 'application/json; charset=utf-8')
self
.
send_header
(
'
Content-Type
'
,
'
application/rdf+xml; charset=utf-8
'
)
self
.
end_headers
()
self
.
wfile
.
write
(
data
.
encode
(
"
utf-8
"
))
...
...
@@ -46,7 +49,8 @@ class HarvestServerMock(threading.Thread):
self
.
_stop_event
=
threading
.
Event
()
self
.
thread_name
=
self
.
__class__
self
.
server
=
HTTPServer
((
hostName
,
hostPort
),
RequestHandler
)
threading
.
Thread
.
__init__
(
self
,
name
=
self
.
thread_name
,
target
=
self
.
server
.
serve_forever
)
threading
.
Thread
.
__init__
(
self
,
name
=
self
.
thread_name
,
target
=
self
.
server
.
serve_forever
)
self
.
setDaemon
(
True
)
...
...
@@ -60,6 +64,7 @@ class HarvestServerMock(threading.Thread):
# except KeyboardInterrupt:
# pass
def
close
(
self
):
self
.
server
.
server_close
()
# print(time.asctime(), "Server Stops - %s:%s" % (hostName, hostPort))
This diff is collapsed.
Click to expand it.
ckanext/odsh/tests/test_rdfexport.py
+
38
−
2
View file @
6d51b7e4
...
...
@@ -9,6 +9,8 @@ import urllib2
import
ckan.tests.helpers
as
helpers
from
ckan.common
import
config
import
ckan.config.middleware
from
ckanext.dcatde.profiles
import
DCATDE
,
DCAT
,
DCATDE_1_0
import
pdb
# run with nosetests --ckan --nologcapture --with-pylons=<config to test> ckanext/odsh/tests/test_routes.py
...
...
@@ -44,9 +46,10 @@ class TestRDFExport:
issued
=
'
27-01-2000
'
,
extras
=
extras
,
owner_org
=
'
test
'
,
license_id
=
"
http://dcat-ap.de/def/licenses/dl-by-de/2.0
"
)
license_id
=
"
http://dcat-ap.de/def/licenses/dl-by-de/2.0
"
,
licenseAttributionByText
=
'
foo
'
)
factories
.
Resource
(
package_id
=
dataset
[
'
id
'
],
license
=
dataset
[
'
license_id
'
])
package_id
=
dataset
[
'
id
'
],
license
=
dataset
[
'
license_id
'
]
,
licenseAttributionByText
=
'
foo
'
)
factories
.
Resource
(
package_id
=
dataset
[
'
id
'
])
...
...
@@ -54,9 +57,25 @@ class TestRDFExport:
response
=
self
.
_get_app
().
get
(
'
/dataset/
'
+
dataset
[
'
name
'
]
+
'
.rdf
'
)
g
.
parse
(
data
=
response
.
body
)
lic
=
self
.
_extract_licenses
(
g
)
att
=
self
.
_extract_licenseAttributions
(
g
)
assert
len
(
lic
)
==
3
assert
len
(
att
)
==
3
assert
len
(
set
([
str
(
l
)
for
l
in
lic
]))
==
1
assert
len
(
set
([
str
(
a
)
for
a
in
att
]))
==
1
assert
str
(
att
[
0
])
==
'
text
'
def
test_catalog
(
self
):
g
=
rdflib
.
Graph
()
response
=
self
.
_get_app
().
get
(
'
/catalog.xml
'
)
g
.
parse
(
data
=
response
.
body
)
datasets
=
list
(
g
.
subjects
(
RDF
.
type
,
DCAT
.
Dataset
))
response
=
self
.
_get_app
().
get
(
'
/api/3/action/package_search
'
)
plist
=
json
.
loads
(
response
.
body
)
assert
len
(
datasets
)
==
plist
[
'
result
'
][
'
count
'
]
def
_get_app
(
self
):
if
not
hasattr
(
self
,
'
app
'
):
...
...
@@ -78,3 +97,20 @@ class TestRDFExport:
ret
.
append
(
l
)
return
ret
def
_extract_licenseAttributions
(
self
,
g
):
datasets
=
list
(
g
.
subjects
(
RDF
.
type
,
DCAT
.
Dataset
))
assert
len
(
datasets
)
==
1
dataset
=
datasets
[
0
]
ret
=
[]
for
namespace
in
[
DCATDE
,
DCATDE_1_0
]:
ret
+=
list
(
g
.
objects
(
dataset
,
namespace
.
licenseAttributionByText
))
distributions
=
list
(
g
.
objects
(
dataset
,
DCAT
.
distribution
))
for
d
in
distributions
:
for
namespace
in
[
DCATDE
,
DCATDE_1_0
]:
ret
+=
list
(
g
.
objects
(
d
,
namespace
.
licenseAttributionByText
))
return
ret
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment