Newer
Older
import logging
import traceback
import ast
import ckan.plugins.toolkit as toolkit
from ckan.common import c
import datetime
import hashlib

Thorge Petersen
committed
from collections import OrderedDict
CKAN_TYPES = {'http://dcat-ap.de/def/datasetTypes/collection': 'collection'}
log = logging.getLogger(__name__)
def odsh_openness_score_dataset_html(dataset):
#dataset = json.loads(dataset)
resources = dataset.get('resources')
for resource in resources:
r_qa = resource.get('qa')
if r_qa:
try:
# r_qa might be a string of a dictionary when 'dataset' is send from solr
qa = ast.literal_eval(r_qa)
else:
qa = r_qa
resource_score = qa.get('openness_score')
if resource_score > score:
score = resource_score
log.error('Error while calculating openness score %s: %s\nException: %s',
e.__class__.__name__, str(e), traceback.format_exc())
def odsh_get_resource_details(resource_id):
resource_details = toolkit.get_action('resource_show')(
data_dict={'id': resource_id})
return resource_details
context = {'model': model, 'session': model.Session,
'user': c.user, 'for_view': True,
'auth_user_obj': c.userobj}
return get_action('resource_view_list')(
context, {'id': resource['id']})
def odsh_get_bounding_box(pkg_dict):
try:
extras = pkg_dict.get('extras')
spatial = None
for f in extras:
if 'key' in f and f['key'] == 'spatial':
break
if spatial is not None:
d = json.loads(spatial)
if 'coordinates' in d:
log.error('Error while bounding box %s: %s\nException: %s',
e.__class__.__name__, str(e), traceback.format_exc())
def compute_bounding_box(coords):
if type(coords[0]) != list:
return [coords[0], coords[0], coords[1], coords[1]]
coords = [c for sublist in coords for c in sublist]
coords = [c for sublist in coords for c in sublist]
minx = min(coords, key=lambda t: t[0])[0]
maxx = max(coords, key=lambda t: t[0])[0]
miny = min(coords, key=lambda t: t[1])[1]
maxy = max(coords, key=lambda t: t[1])[1]
return [maxx, minx, maxy, miny]
def odsh_get_spatial_text(pkg_dict):
extras = pkg_dict.get('extras')
spatial = None
for f in extras:
if 'key' in f and f['key'] == 'spatial_text':
def extend_search_convert_local_to_utc_timestamp(str_timestamp):
if not str_timestamp:
return None
if not re.match(r'\d\d\d\d-\d\d-\d\d', str_timestamp):
dt = parser.parse(str_timestamp, dayfirst=False).isoformat()
return dt+"Z"
def odsh_render_datetime(datetime_, fromIso=True):
if not datetime_:
return ''
if not re.match(r'\d\d\d\d-\d\d-\d\d', datetime_):
return ''
if fromIso:
DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S'
else:
DATETIME_FORMAT = '%Y-%m-%d'
dt = parser.parse(datetime_, dayfirst=False)
return date_format.format(dt)

Thorge Petersen
committed
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
def odsh_resource_formats(as_simple_list = False):
"""
Retrieves a list of file formats from an RDF file using string manipulation and sorts them alphabetically.
Returns:
A list of dictionaries containing key-value pairs for each file format.
"""
# Path to the RDF file with resource formats
extension_path = pkg_resources.resource_filename('ckanext.odsh', '')
filepath = config.get(
'ckanext.odsh.resource_formats_fallback_filepath', extension_path + '/resources/fileformats.rdf')
# Read the contents of the RDF file
with open(filepath, 'r', encoding='utf-8') as file:
rdf_content = file.read()
# Regular expression pattern to extract the file format values
pattern = r'<rdf:Description rdf:about="http:\/\/publications.europa.eu\/resource\/authority\/file-type\/(\w+)">'
# List to store the key-value pairs
file_formats = []
# Find all matches of the pattern in the RDF content
matches = re.findall(pattern, rdf_content)
# Iterate over the matches and add key-value pairs to the list
for match in matches:
file_formats.append({'key': match, 'value': match})
# Sort the list alphabetically based on the file format values
file_formats = sorted(file_formats, key=lambda x: x['value'])
if(as_simple_list):
file_formats = [obj['value'] for obj in file_formats]
return file_formats
def odsh_create_checksum(in_string):
hashstring = hashlib.md5(in_string.encode('utf-8')).hexdigest()
return int(hashstring, base=16)
def odsh_extract_error(key, errors, field='extras'):
if not errors or not (field in errors):
for item in ext:
if 'key' in item:
for error in item['key']:
if error.startswith(key):
def odsh_extract_error_new(key, errors):
if not errors or not ('__extras' in errors):
return None
error = errors['__extras'][0].get(key,None)
def odsh_extract_value_from_extras(extras, key):
if not extras:
return None
for item in extras:
if 'key' in item and item['key'].lower() == key.lower():
if 'value' in item:
return item['value']
return None
'''Returns [(l.title, l.id), ...] for the licenses configured to be
offered. Always includes the existing_license_id, if supplied.
'''
register = model.Package.get_license_register()
license_ids = [license.id for license in licenses]
if existing_license_id and existing_license_id not in license_ids:
license_ids.insert(0, existing_license_id)
(license_id,
register[license_id].title if license_id in register else license_id)
for license_id in license_ids]
def odsh_has_more_facets(facet, limit=None, exclude_active=False):
facets = []
for facet_item in c.search_facets.get(facet)['items']:
if not len(facet_item['name'].strip()) or facet_item['count']==0:
continue
if not (facet, facet_item['name']) in list(request.args.items()):
facets.append(dict(active=False, **facet_item))
elif not exclude_active:
facets.append(dict(active=True, **facet_item))
if c.search_facets_limits and limit is None:
limit = c.search_facets_limits.get(facet)
if limit is not None and len(facets) > limit:
return True
return False
def odsh_public_url():
return config.get('ckanext.odsh.public_url')
extension_path = pkg_resources.resource_filename('ckanext.odsh', '')
file_path = config.get('ckanext.odsh.spatial.mapping', extension_path + '/resources/schleswig-holstein_geojson.csv')
mapping_file = urllib.request.urlopen(file_path)
except Exception:
raise Exception("Could not load spatial mapping file!")
spatial_text = str()
spatial = str()
cr = csv.reader(mapping_file, delimiter="\t")
result = []
for row in cr:
spatial_text = row[1]
result.append(spatial_text.decode('UTF-8'))
return result
def odsh_public_resource_url(res):
home = config.get('ckanext.odsh.public_url')
if res.get('url_type',None) == 'upload' and 'url' in res:
f = urlsplit(res['url'])
return urlunsplit(('', '', f[2], f[3], f[4]))
return config.get('ckanext.odsh.showtestbanner', False) == True
def odsh_get_facet_items_dict(name, search_facets, limit=None):
'''
Gets all facets like 'get_facet_items_dict' but sorted alphabetically
instead by count.
'''
if name == 'groups':
limit = 20
facets = helpers.get_facet_items_dict(name, search_facets, limit)
facets.sort(key=lambda it: (it['display_name'].lower(), -it['count']))
return facets
def odsh_main_groups():
'''Return a list of the groups to be shown on the start page.'''
# Get a list of all the site's groups from CKAN, sorted by number of
# datasets.
groups = toolkit.get_action('group_list')(
data_dict={'all_fields': True})
return groups
def odsh_now():
return helpers.render_datetime(datetime.datetime.now(), "%Y-%m-%d")
def odsh_group_id_selected(selected, group_id):
if type(selected) is not list:
selected = [selected]
for g in selected:
if (isinstance(g, str) and group_id == g) or (type(g) is dict and group_id == g['id']):
# def odsh_remove_route(map, routename):
# route = None
# for i, r in enumerate(map.matchlist):
# if r.name == routename:
# route = r
# break
# if route is not None:
# map.matchlist.remove(route)
# for key in map.maxkeys:
# if key == route.maxkeys:
# map.maxkeys.pop(key)
# map._routenames.pop(route.name)
# break
def is_within_last_month(date, date_ref=None):
'''
date is a datetime.date object containing the date to be checked
date_ref is a datetime.date object containing the reference date
if date_ref is not specified, the date of today is used
this method is needed by the method OdshPlugin.before_view in plugin.py
'''
if not date_ref:
date_ref = datetime.date.today()
[year_ref, month_ref, day_ref] = [date_ref.year, date_ref.month, date_ref.day]
try:
if month_ref > 1:
one_month_ago = datetime.date(year_ref, month_ref-1, day_ref)
else:
one_month_ago = datetime.date(year_ref-1, 12, day_ref)
except ValueError:
# this happens if month before month_ref has less days than month_ref
one_month_ago = datetime.date(year_ref, month_ref, 1) - datetime.timedelta(days=1)
if date > one_month_ago:
return True
return False
def get_all_datasets_belonging_to_collection(context, collection_name):
rel_collection_dict = dict({"id": collection_name})
name_list = list()
try:
list_rel_collection = get_action('package_relationships_list')(context, rel_collection_dict)
except AssertionError:
#if there does not exist an relationship, returns an empty list
return name_list
for item in list_rel_collection:
item_object = item.get('object')
name_list.append(item_object)
return name_list
def get_all_datasets_belonging_to_collection_by_dataset(context, dataset_name):
collection_name = get_collection_name_by_dataset(context, dataset_name)
name_list = get_all_datasets_belonging_to_collection(context, collection_name)
def get_collection_name_by_dataset(context, dataset_name):
rel_dataset_dict = dict({"id" : dataset_name})
list_rel_dataset = toolkit.get_action('package_relationships_list')(context, rel_dataset_dict)
if not len(list_rel_dataset):
return None
collection_name = list_rel_dataset[0]['object']
return collection_name
def get_successor_and_predecessor_dataset(context, pkg_dict):
dataset_name = pkg_dict.get('name')
siblings_dicts_with_access = _get_siblings_dicts_with_access(context, pkg_dict)
if siblings_dicts_with_access:
n_siblings = len(siblings_dicts_with_access)
siblings_dicts_sorted_by_date_issued = _sort_siblings_by_name_and_date(siblings_dicts_with_access)
siblings_names_sorted_by_date_issued = [d['name'] for d in siblings_dicts_sorted_by_date_issued]
id_current_dataset = siblings_names_sorted_by_date_issued.index(dataset_name)
predecessor_name = (
siblings_names_sorted_by_date_issued[id_current_dataset-1] if (id_current_dataset > 0)
else None
)
successor_name = (
siblings_names_sorted_by_date_issued[id_current_dataset+1] if (id_current_dataset < n_siblings-1)
else None
)
else:
predecessor_name, successor_name = None, None
return successor_name, predecessor_name
def _get_siblings_dicts_with_access(context, pkg_dict):
dataset_name = pkg_dict.get('name')
list_of_siblings = get_all_datasets_belonging_to_collection_by_dataset(context, dataset_name)
n_siblings = len(list_of_siblings)
if n_siblings>0:
siblings_dicts = [get_package_dict(name) for name in list_of_siblings]
user_has_access = lambda pkg_dict:helpers.check_access('package_show', pkg_dict)
siblings_dicts_with_access = list(filter(user_has_access, siblings_dicts))
return siblings_dicts_with_access
return None
def _sort_siblings_by_name_and_date(siblings_dicts):
'''
sort by name first and then by date to have a fallback if dates are the same
'''
_get_name = lambda pkg_dict:pkg_dict.get('name')
_get_issued = lambda pkg_dict:odsh_extract_value_from_extras(pkg_dict.get('extras'), 'issued')
siblings_dicts_sorted_by_name = sorted(siblings_dicts, key=_get_name)
siblings_dicts_sorted_by_date_issued = sorted(siblings_dicts_sorted_by_name, key=_get_issued)
return siblings_dicts_sorted_by_date_issued
def get_package_dict(name):
return model.Package.get(name).as_dict()
def get_successor_and_predecessor_urls(context, pkg_dict):
successor_name, predecessor_name = get_successor_and_predecessor_dataset(context, pkg_dict)
if name is not None
else None
for name in (successor_name, predecessor_name)
)
return successor_url, predecessor_url
def short_name_for_category(category_name):
translations = {
'soci': 'Bevölkerung',
'educ': 'Bildung',
'ener': 'Energie',
'heal': 'Gesundheit',
'intr': 'Internationales',
'just': 'Justiz',
'agri': 'Landwirtschaft',
'gove': 'Regierung',
'regi': 'Regionales',
'envi': 'Umwelt',
'tran': 'Verkehr',
'econ': 'Wirtschaft',
'tech': 'Wissenschaft',

Thorge Petersen
committed
def odsh_load_mdk_sample_dataset():
'''
Load sample dataset (Musterkatalog/Musterdatensatz).
See https://bertelsmannstift.github.io/Musterdatenkatalog/def/musterdatensatz.rdf
and corresponding mapping in mdk_mapping.json file.
'''
extension_path = pkg_resources.resource_filename('ckanext.odsh', '')
default_sample_data_file_path = extension_path + '/resources/mdk_mapping.json'

Thorge Petersen
committed
sample_data_file_path = config.get(
'ckanext.odsh.sample_data_file_path', default_sample_data_file_path)
try:
with open(sample_data_file_path) as mapping_json:
MDK_MAPPING = json.loads(mapping_json.read(), object_pairs_hook=OrderedDict)
default = [{'value': 'Musterdatensatz wählen..', 'key': ''}]

Thorge Petersen
committed
mdk = [{'key': key, 'value': MDK_MAPPING[key]} for key in MDK_MAPPING]
result = default+mdk
except IOError as err:
log.error(
'Could not load sample dataset mapping file from {}'
.format(sample_data_file_path)
)
raise
except ValueError as err:
log.error(
'Could not convert sample dataset mapping file from json. \nSample dataset mapping file: {}'
.format(sample_data_file_path)
)
raise
return result
def odsh_load_raw_mdk_sample_dataset():
'''
Load sample dataset (Musterkatalog/Musterdatensatz).
See https://bertelsmannstift.github.io/Musterdatenkatalog/def/musterdatensatz.rdf
and corresponding mapping in mdk_mapping.json file.
'''
extension_path = pkg_resources.resource_filename('ckanext.odsh', '')
default_sample_data_file_path = extension_path + '/resources/mdk_mapping.json'
sample_data_file_path = config.get(
'ckanext.odsh.sample_data_file_path', default_sample_data_file_path)
try:
with open(sample_data_file_path) as mapping_json:
result = json.loads(mapping_json.read(), object_pairs_hook=OrderedDict)
except IOError as err:
log.error(
'Could not load sample dataset mapping file from {}'
.format(sample_data_file_path)
)
raise
except ValueError as err:
log.error(
'Could not convert sample dataset mapping file from json. \nSample dataset mapping file: {}'
.format(sample_data_file_path)
)
raise
return result
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
def map_dct_type_to_ckan_type(dct_type):
'''
matches the field dct:type from a harvested rdf file
to the corresponding ckan package type
'''
ckan_type = CKAN_TYPES.get(dct_type)
return ckan_type
def map_ckan_type_to_dct_type(ckan_type):
DCT_TYPES = _revert_dict(CKAN_TYPES)
dct_type = DCT_TYPES.get(ckan_type)
return dct_type
def _revert_dict(d):
d_inverse = {v: k for k, v in d.items()}
return d_inverse
def add_pkg_to_collection(id_pkg, id_collection):
if id_pkg and id_collection:
relationship_dict = {
'subject': id_pkg,
'object': id_collection,
'type': 'child_of',
}
toolkit.get_action('package_relationship_create')(None, relationship_dict)
def correct_missing_relationship(pkg_dict, pkg_relationships_from_model):
'''
This function corrects missing relationship in show package.
Note this fix is only good with one or non relationship.
This error is well known but was not fixed. https://github.com/ckan/ckan/issues/3114
The error causes the deletation of relationships, because package_show is
used in resource_create to get the package.
'''
if pkg_relationships_from_model:
relationship_from_model = pkg_relationships_from_model[0]
relationship_list_from_dict = pkg_dict.get('relationships_as_subject')
type_pkg = pkg_dict.get('type')
needs_update = type_pkg == 'dataset' and not relationship_list_from_dict
if needs_update:
relationship_for_package = {
'__extras': {
'object_package_id': relationship_from_model.object_package_id,
'subject_package_id': relationship_from_model.subject_package_id,
},
'comment': relationship_from_model.subject_package_id,
'id': relationship_from_model.id,
'type': relationship_from_model.type,
}
pkg_dict['relationships_as_subject'].append(relationship_for_package)
return pkg_dict
def get_pkg_relationships_from_model(pkg_dict):
pkg_id = pkg_dict.get('id')
return model.Package.get(pkg_id).get_relationships()
def load_language_mapping():
extension_path = pkg_resources.resource_filename('ckanext.odsh', '')
file_path = config.get('ckanext.odsh.language_mapping', extension_path + '/resources/language_mapping.json')
with open(file_path) as language_mapping_json:
LANGUAGE_MAPPING = json.loads(language_mapping_json.read())
return LANGUAGE_MAPPING
def load_json_to_ordered_dict(json_str):
return json.loads(json_str, object_pairs_hook=OrderedDict)
def get_language_of_package(pkg_dict):
LANGUAGE_MAPPING = load_language_mapping()
language_id = _get_language_id(pkg_dict)
if not language_id:
return None
language = LANGUAGE_MAPPING.get(language_id)
return language
def get_language_icon(pkg_dict):
ICONS = {
"http://publications.europa.eu/resource/authority/language/DAN": '/base/images/icon_lang_danish.png',
"http://publications.europa.eu/resource/authority/language/ENG": '/base/images/icon_lang_english.png',
}
language_id = _get_language_id(pkg_dict)
if not language_id:
return None
return ICONS.get(language_id)
def _get_language_id(pkg_dict):
language_id = odsh_extract_value_from_extras(pkg_dict.get('extras'), 'language')
language_id = pkg_dict.get('language')
if not language_id:
language_id = odsh_extract_value_from_extras(
pkg_dict.get('extras'), 'language'
)
if not language_id:
return None
language_id_cleaned = re.sub('[\[\]\"]', '', language_id)
return language_id_cleaned
def get_spatial_for_selection():
extension_path = pkg_resources.resource_filename('ckanext.odsh', '')
file_path = config.get('ckanext.odsh.spatial.mapping', extension_path + '/resources/schleswig-holstein_geojson.csv')
with open(file_path, newline='') as mapping_file:
cr = csv.reader(mapping_file, delimiter="\t")
spatial_mapping = list(cr)
unique_mapping = []
seen_values = set()
for key, value, _ in spatial_mapping:
if value in seen_values:
continue # Skip if the value has already been seen
if "municipalityKey" in key:
unique_mapping.append({'key': key, 'value': value})
else:
# Check if there is a municipality key entry for the value
municipality_entry = next(
(entry for entry in spatial_mapping if entry[1] == value and "municipalityKey" in entry[0]),
None
)
if municipality_entry:
# If a municipality key entry exists, use it instead of the current key
unique_mapping.append({'key': municipality_entry[0], 'value': value})
else:
# Otherwise, use the current key
unique_mapping.append({'key': key, 'value': value})
seen_values.add(value)
unique_mapping.append({'key': '', 'value': ''})
return unique_mapping
def get_language_for_selection():
LANGUAGE_MAPPING = load_language_mapping()
dict_for_select_box = [{'key': key, 'value': LANGUAGE_MAPPING[key]} for key in LANGUAGE_MAPPING]
return dict_for_select_box
def get_package_dict(name):
'''
raises ckan.logic.NotFound if not found
'''
package_dict = toolkit.get_action('package_show')(None, {'id': name})
return package_dict
def size_of_fmt(num, suffix='B'):
for unit in ['',' k',' M',' G',' T',' P',' E',' Z']:
if abs(num) < 1000.0:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1000.0
return "%.1f%s%s" % (num, 'Y', suffix)
def get_resource_size(resource):
resource_size = resource.get('size')
if resource_size:
return size_of_fmt(resource_size)
def get_address_org(organization):
list_extras = organization.get('extras')
address = dict()
if not list_extras:
return address
for extra in list_extras:
address.update({extra.get('key'):extra.get('value')})
web = address.get('web')
if web and not web.startswith('http'):
web = 'http://' + web
address.update({'web':web})
return address
def get_body_mail(organization, package):
package_name = package.get('name')
url = helpers.url_for('dataset.read', id=package_name, qualified = True)
title = package.get('title')
anrede = "Sehr geehrte Damen und Herren," + "%0D%0A" + "%0D%0A" + "zu folgendem Eintrag habe ich eine Anmerkung/Frage:" + "%0D%0A" + "%0D%0A"
mail_titel = "Titel: " + title + "%0D%0A"
mail_document = "Dokument-ID: " + package_name + "%0D%0A"
mail_url = "URL: " + url + "%0D%0A" + "%0D%0A"
message = mail_titel + mail_document + mail_url + "Mein Kommentar:" + "%0D%0A" + "%0D%0A" + "%0D%0A" + "%0D%0A"
return anrede + message