diff --git a/ckanext/odsh/profiles/odsh_european_dcatap_profile.py b/ckanext/odsh/profiles/odsh_european_dcatap_profile.py deleted file mode 100644 index 544323bd1d9b32db11f84e78b93f4a5fb364f92d..0000000000000000000000000000000000000000 --- a/ckanext/odsh/profiles/odsh_european_dcatap_profile.py +++ /dev/null @@ -1,139 +0,0 @@ -import logging -import rdflib - -from ckan.common import config, json -from ckan.model.license import LicenseRegister -from ckanext.dcat.profiles import EuropeanDCATAPProfile, DCT, URIRefOrLiteral -from ckanext.dcatde.profiles import DCAT - -log = logging.getLogger(__name__) -DCT = rdflib.namespace.Namespace("http://purl.org/dc/terms/") -DCAT = rdflib.namespace.Namespace("http://www.w3.org/ns/dcat#") - - -class ODSHEuropeanDCATAPProfile(EuropeanDCATAPProfile): - - def _license(self, dataset_ref): - if self._licenceregister_cache is not None: - license_uri2id, license_title2id = self._licenceregister_cache - else: - license_uri2id = {} - license_title2id = {} - for license_id, license in LicenseRegister().items(): - license_uri2id[license_id] = license_id - license_uri2id[license.url] = license_id - license_title2id[license.title] = license_id - self._licenceregister_cache = license_uri2id, license_title2id - - for distribution in self._distributions(dataset_ref): - # If distribution has a license, attach it to the dataset - license = self._object(distribution, DCT.license) - if license: - # Try to find a matching license comparing URIs, then titles - license_id = license_uri2id.get(license.toPython()) - if not license_id: - license_id = license_title2id.get( - self._object_value(license, DCT.title)) - if license_id: - return license_id - return '' - - def _distribution_format(self, distribution, normalize_ckan_format=True): - imt, label = super(ODSHEuropeanDCATAPProfile, self)._distribution_format( - distribution, normalize_ckan_format) - if label in resource_formats_import(): - label = resource_formats_import()[label] - return imt, label - - def graph_from_dataset(self, dataset_dict, dataset_ref): - super(ODSHEuropeanDCATAPProfile, self).graph_from_dataset( - dataset_dict, dataset_ref) - for s, p, o in self.g.triples((None, rdflib.RDF.type, DCAT.Distribution)): - for s2, p2, o2 in self.g.triples((s, DCT['format'], None)): - if o2.decode() in resource_formats_export(): - self.g.set((s, DCT['format'], rdflib.URIRef( - resource_formats_export()[o2.decode()]))) - for s, p, o in self.g.triples((None, DCT.language, None)): - if o.decode() in get_language(): - self.g.set((s, p, rdflib.URIRef(get_language()[o.decode()]))) - elif type(o) == rdflib.Literal and type(URIRefOrLiteral(o.decode())) == rdflib.URIRef: - self.g.set((s, p, rdflib.URIRef(o.decode()))) - - license = dataset_dict.get('license_id', None) - if license: - self.g.add((dataset_ref, DCT.license, rdflib.URIRef(license))) - for dist in self.g.objects(dataset_ref, DCAT.distribution): - self.g.add((dist, DCT.license, rdflib.URIRef(license))) - - -_RESOURCE_FORMATS_IMPORT = None -_RESOURCE_FORMATS_EXPORT = None - -def resource_formats(): - global _RESOURCE_FORMATS_IMPORT - global _RESOURCE_FORMATS_EXPORT - _RESOURCE_FORMATS_IMPORT = {} - _RESOURCE_FORMATS_EXPORT = {} - g = rdflib.Graph() - # Something went wrong with trying to get the file formats online, try to use backup instead - try: - fallback_filepath = config.get( - 'ckan.odsh.resource_formats_fallback_filepath') - g.parse(fallback_filepath) - assert len(set([s for s in g.subjects()])) > 120 - except: - log.exception("failed to process resource_formats") - raise Exception('failed to load formats') - file_types = [subj.decode() for subj in g.subjects()] - - for elem in sorted(set(file_types)): - if elem.split('/')[-1] != 'file-type': - _RESOURCE_FORMATS_EXPORT[elem.split('/')[-1]] = elem - _RESOURCE_FORMATS_IMPORT[elem] = elem.split('/')[-1] - -def resource_formats_export(): - global _RESOURCE_FORMATS_EXPORT - if not _RESOURCE_FORMATS_EXPORT: - resource_formats() - return _RESOURCE_FORMATS_EXPORT - -def resource_formats_import(): - global _RESOURCE_FORMATS_IMPORT - if not _RESOURCE_FORMATS_IMPORT: - resource_formats() - return _RESOURCE_FORMATS_IMPORT - - -_LANGUAGES = None - -def get_language(): - ''' When datasets are exported in rdf-format, their language-tag - should be given as - "<dct:language rdf:resource="http://publications.europa.eu/.../XXX"/>", - where XXX represents the language conforming to iso-639-3 standard. - However, some imported datasets represent their language as - "<dct:language>de</dct:language>", which will be interpreted here as - iso-639-1 values. As we do not display the language setting in the - web frontend, this function only assures the correct export format, - by using 'languages.json' as mapping table. - ''' - global _LANGUAGES - if not _LANGUAGES: - _LANGUAGES = {} - languages_file_path = config.get('ckanext.odsh.language.mapping') - if not languages_file_path: - log.warning( - "Could not find config setting: 'ckanext.odsh.language.mapping', using fallback instead.") - languages_file_path = '/usr/lib/ckan/default/src/ckanext-odsh/languages.json' - with open(languages_file_path) as languages_file: - try: - language_mapping_table = json.loads(languages_file.read()) - except ValueError, e: - # includes simplejson.decoder.JSONDecodeError - raise ValueError('Invalid JSON syntax in %s: %s' % - (languages_file_path, e)) - - for language_line in language_mapping_table: - _LANGUAGES[language_line[0]] = language_line[1] - - return _LANGUAGES \ No newline at end of file diff --git a/out.uls b/out.uls deleted file mode 100644 index de4db1551cc3abf0e549b6545439211fd1fe70df..0000000000000000000000000000000000000000 --- a/out.uls +++ /dev/null @@ -1,30 +0,0 @@ -V;2019-04-25 12:18:36;ckan274;"ULS";"Exception";"traceback";"Text";" -Traceback (most recent call last): -File \\"/usr/lib/ckan/default/src/ckanext-odsh/ckanext/odsh/plugin.py\\", line 358, in before_search -raise BaseException('boom') -BaseException: boom -"; -V;2019-04-25 12:18:37;ckan274;"ULS";"Exception";"traceback";"Text";" -Traceback (most recent call last): -File \\"/usr/lib/ckan/default/src/ckanext-odsh/ckanext/odsh/plugin.py\\", line 358, in before_search -raise BaseException('boom') -BaseException: boom -"; -V;2019-04-25 12:18:38;ckan274;"ULS";"Exception";"traceback";"Text";" -Traceback (most recent call last): -File \\"/usr/lib/ckan/default/src/ckanext-odsh/ckanext/odsh/plugin.py\\", line 358, in before_search -raise BaseException('boom') -BaseException: boom -"; -V;2019-04-25 12:18:40;ckan274;"ULS";"Exception";"traceback";"Text";" -Traceback (most recent call last): -File \\"/usr/lib/ckan/default/src/ckanext-odsh/ckanext/odsh/plugin.py\\", line 358, in before_search -raise BaseException('boom') -BaseException: boom -"; -V;2019-04-25 12:18:41;ckan274;"ULS";"Exception";"traceback";"Text";" -Traceback (most recent call last): -File \\"/usr/lib/ckan/default/src/ckanext-odsh/ckanext/odsh/plugin.py\\", line 358, in before_search -raise BaseException('boom') -BaseException: boom -"; diff --git a/validation.py b/validation.py deleted file mode 100644 index e50cbb4394ddbcc447a7fe87d200e738ba6019e6..0000000000000000000000000000000000000000 --- a/validation.py +++ /dev/null @@ -1,241 +0,0 @@ -# This Python file uses the following encoding: utf-8 -import logging -import csv -import re -import urllib2 -import json -from itertools import count -from dateutil.parser import parse - -import ckan.plugins.toolkit as toolkit -import ckan.model as model -from ckan.lib.navl.dictization_functions import Missing - -from pylons import config - -import pdb - -_ = toolkit._ - -log = logging.getLogger(__name__) - - -def _extract_value(data, field): - key = None - for k in data.keys(): - if data[k] == field: - key = k - break - if key is None: - return None - return data[(key[0], key[1], 'value')] - - -def validate_extra_groups(data, requireAtLeastOne, errors): - value = _extract_value(data, 'groups') - if value != None: - # 'value != None' means the extra key 'groups' was found, - # so the dataset came from manual editing via the web-frontend. - if not value: - if requireAtLeastOne: - errors['groups'] = 'at least one group needed' - data[('groups', 0, 'id')] = '' - return - - groups = [g.strip() for g in value.split(',') if value.strip()] - for k in data.keys(): - if len(k) == 3 and k[0] == 'groups': - data[k] = '' - # del data[k] - if len(groups) == 0: - if requireAtLeastOne: - errors['groups'] = 'at least one group needed' - return - - for num, group in zip(range(len(groups)), groups): - data[('groups', num, 'id')] = group - else: # no extra-field 'groups' - # dataset might come from a harvest process - if not data.get(('groups', 0, 'id'), False) and \ - not data.get(('groups', 0, 'name'), False): - errors['groups'] = 'at least one group needed' - - -def validate_extras(key, data, errors, context): - extra_errors = {} - isStaNord = ('id',) in data and data[('id',)][:7] == 'StaNord' - - validate_extra_groups(data, True, extra_errors) - validate_extra_date_new(key, 'issued', data, isStaNord, extra_errors) - validate_extra_date_new(key, 'temporal_start', - data, isStaNord, extra_errors) - validate_extra_date_new(key, 'temporal_end', data, True, extra_errors) - - if len(extra_errors.values()): - raise toolkit.Invalid(extra_errors) - - -def _set_value(data, field, value): - key = None - for k in data.keys(): - if data[k] == field: - key = k - break - if key is None: - return None - data[(key[0], key[1], 'value')] = value - - -def validate_extra_date_new(key, field, data, optional, errors): - value = _extract_value(data, field) - - if not value: - if not optional: - errors[field] = 'empty' - return - else: - if re.match(r'\d\d\d\d-\d\d-\d\d', value): - try: - dt = parse(value) - _set_value(data, field, dt.isoformat()) - return - except ValueError: - pass - errors[field] = 'not a valid date' - - -def validate_licenseAttributionByText(key, data, errors, context): - register = model.Package.get_license_register() - isByLicense = False - for k in data: - if len(k) > 0 and k[0] == 'license_id' and data[k] and not isinstance(data[k], Missing) and \ - 'Namensnennung' in register[data[k]].title: - isByLicense = True - break - hasAttribution = False - for k in data: - if data[k] == 'licenseAttributionByText': - if isinstance(data[(k[0], k[1], 'value')], Missing) or (k[0], k[1], 'value') not in data: - del data[(k[0], k[1], 'value')] - del data[(k[0], k[1], 'key')] - break - else: - value = data[(k[0], k[1], 'value')] - hasAttribution = value != '' - break - if not hasAttribution: - current_indexes = [k[1] for k in data.keys() - if len(k) > 1 and k[0] == 'extras'] - - new_index = max(current_indexes) + 1 if current_indexes else 0 - data[('extras', new_index, 'key')] = 'licenseAttributionByText' - data[('extras', new_index, 'value')] = '' - - if isByLicense and not hasAttribution: - raise toolkit.Invalid( - 'licenseAttributionByText: empty not allowed') - - if not isByLicense and hasAttribution: - raise toolkit.Invalid( - 'licenseAttributionByText: text not allowed for this license') - - -def known_spatial_uri(key, data, errors, context): - value = _extract_value(data, 'spatial_uri') - - if not value: - poly = None - - # some harvesters might import a polygon directly... - # pdb.set_trace() - poly = _extract_value(data, 'spatial') - - has_old_uri = False - pkg = context.get('package', None) - if pkg: - old_uri = pkg.extras.get('spatial_uri', None) - has_old_uri = old_uri != None and len(old_uri) > 0 - if not poly: - poly = pkg.extras.get('spatial', None) - if not poly or has_old_uri: - raise toolkit.Invalid('spatial_uri: empty not allowed') - else: - if poly: - new_index = next_extra_index(data) - data[('extras', new_index+1, 'key')] = 'spatial' - data[('extras', new_index+1, 'value')] = poly - return - - mapping_file = config.get('ckanext.odsh.spatial.mapping') - try: - mapping_file = urllib2.urlopen(mapping_file) - except Exception: - raise Exception("Could not load spatial mapping file!") - - not_found = True - spatial_text = str() - spatial = str() - cr = csv.reader(mapping_file, delimiter="\t") - for row in cr: - if row[0].encode('UTF-8') == value: - not_found = False - spatial_text = row[1] - loaded = json.loads(row[2]) - spatial = json.dumps(loaded['geometry']) - break - if not_found: - raise toolkit.Invalid( - 'spatial_uri: uri unknown') - - new_index = next_extra_index(data) - - data[('extras', new_index, 'key')] = 'spatial_text' - data[('extras', new_index, 'value')] = spatial_text - data[('extras', new_index+1, 'key')] = 'spatial' - data[('extras', new_index+1, 'value')] = spatial - - -def next_extra_index(data): - current_indexes = [k[1] for k in data.keys() - if len(k) > 1 and k[0] == 'extras'] - - return max(current_indexes) + 1 if current_indexes else 0 - - -def tag_name_validator(value, context): - tagname_match = re.compile('[\w \-.\:\(\)\ยด\`]*$', re.UNICODE) - if not tagname_match.match(value): - raise toolkit.Invalid(_('Tag "%s" must be alphanumeric ' - 'characters or symbols: -_.:()') % (value)) - return value - - -def tag_string_convert(key, data, errors, context): - '''Takes a list of tags that is a comma-separated string (in data[key]) - and parses tag names. These are added to the data dict, enumerated. They - are also validated.''' - if isinstance(data[key], basestring): - tags = [tag.strip() - for tag in data[key].split(',') - if tag.strip()] - else: - tags = data[key] - - current_index = max([int(k[1]) for k in data.keys() - if len(k) == 3 and k[0] == 'tags'] + [-1]) - - for num, tag in zip(count(current_index+1), tags): - data[('tags', num, 'name')] = tag - - for tag in tags: - toolkit.get_validator('tag_length_validator')(tag, context) - tag_name_validator(tag, context) - - -def get_validators(): - return { - 'known_spatial_uri': known_spatial_uri, - 'odsh_tag_name_validator': tag_name_validator, - 'odsh_validate_extras': validate_extras, - 'validate_licenseAttributionByText': validate_licenseAttributionByText - }