überflüssige Dateien gelöscht

0c694fda · Jesper Zedlitz · d2d237a1 · d2d237a1 · d2d237a1 · d2d237a1
Commit 0c694fda authored 2 years ago by Jesper Zedlitz
--- a/ckanext/odsh/profiles/odsh_european_dcatap_profile.py
+++ b/ckanext/odsh/profiles/odsh_european_dcatap_profile.py
-import logging
-import rdflib
-from ckan.common import config, json
-from ckan.model.license import LicenseRegister
-from ckanext.dcat.profiles import EuropeanDCATAPProfile, DCT, URIRefOrLiteral
-from ckanext.dcatde.profiles import DCAT
-log = logging.getLogger(__name__)
-DCT = rdflib.namespace.Namespace("http://purl.org/dc/terms/")
-DCAT = rdflib.namespace.Namespace("http://www.w3.org/ns/dcat#")
-class ODSHEuropeanDCATAPProfile(EuropeanDCATAPProfile):
-    def _license(self, dataset_ref):
-        if self._licenceregister_cache is not None:
-            license_uri2id, license_title2id = self._licenceregister_cache
-        else:
-            license_uri2id = {}
-            license_title2id = {}
-            for license_id, license in LicenseRegister().items():
-                license_uri2id[license_id] = license_id
-                license_uri2id[license.url] = license_id
-                license_title2id[license.title] = license_id
-            self._licenceregister_cache = license_uri2id, license_title2id
-        for distribution in self._distributions(dataset_ref):
-            # If distribution has a license, attach it to the dataset
-            license = self._object(distribution, DCT.license)
-            if license:
-                # Try to find a matching license comparing URIs, then titles
-                license_id = license_uri2id.get(license.toPython())
-                if not license_id:
-                    license_id = license_title2id.get(
-                        self._object_value(license, DCT.title))
-                if license_id:
-                    return license_id
-        return ''
-    def _distribution_format(self, distribution, normalize_ckan_format=True):
-        imt, label = super(ODSHEuropeanDCATAPProfile, self)._distribution_format(
-            distribution, normalize_ckan_format)
-        if label in resource_formats_import():
-            label = resource_formats_import()[label]
-        return imt, label
-    def graph_from_dataset(self, dataset_dict, dataset_ref):
-        super(ODSHEuropeanDCATAPProfile, self).graph_from_dataset(
-            dataset_dict, dataset_ref)
-        for s, p, o in self.g.triples((None, rdflib.RDF.type, DCAT.Distribution)):
-            for s2, p2, o2 in self.g.triples((s, DCT['format'], None)):
-                if o2.decode() in resource_formats_export():
-                    self.g.set((s, DCT['format'], rdflib.URIRef(
-                        resource_formats_export()[o2.decode()])))
-        for s, p, o in self.g.triples((None, DCT.language, None)):
-            if o.decode() in get_language():
-                self.g.set((s, p, rdflib.URIRef(get_language()[o.decode()])))
-            elif type(o) == rdflib.Literal and type(URIRefOrLiteral(o.decode())) == rdflib.URIRef:
-                self.g.set((s, p, rdflib.URIRef(o.decode())))
-        license = dataset_dict.get('license_id', None)
-        if license:
-            self.g.add((dataset_ref, DCT.license, rdflib.URIRef(license)))
-            for dist in self.g.objects(dataset_ref, DCAT.distribution):
-                self.g.add((dist, DCT.license, rdflib.URIRef(license)))
-_RESOURCE_FORMATS_IMPORT = None
-_RESOURCE_FORMATS_EXPORT = None
-def resource_formats():
-    global _RESOURCE_FORMATS_IMPORT
-    global _RESOURCE_FORMATS_EXPORT
-    _RESOURCE_FORMATS_IMPORT = {}
-    _RESOURCE_FORMATS_EXPORT = {}
-    g = rdflib.Graph()
-    # Something went wrong with trying to get the file formats online, try to use backup instead
-    try:
-        fallback_filepath = config.get(
-            'ckan.odsh.resource_formats_fallback_filepath')
-        g.parse(fallback_filepath)
-        assert len(set([s for s in g.subjects()])) > 120
-    except:
-        log.exception("failed to process resource_formats")
-        raise Exception('failed to load formats')
-    file_types = [subj.decode() for subj in g.subjects()]
-    for elem in sorted(set(file_types)):
-        if elem.split('/')[-1] != 'file-type':
-            _RESOURCE_FORMATS_EXPORT[elem.split('/')[-1]] = elem
-            _RESOURCE_FORMATS_IMPORT[elem] = elem.split('/')[-1]
-def resource_formats_export():
-    global _RESOURCE_FORMATS_EXPORT
-    if not _RESOURCE_FORMATS_EXPORT:
-        resource_formats()
-    return _RESOURCE_FORMATS_EXPORT
-def resource_formats_import():
-    global _RESOURCE_FORMATS_IMPORT
-    if not _RESOURCE_FORMATS_IMPORT:
-        resource_formats()
-    return _RESOURCE_FORMATS_IMPORT
-_LANGUAGES = None
-def get_language():
-    ''' When datasets are exported in rdf-format, their language-tag
-    should be given as
-    "<dct:language rdf:resource="http://publications.europa.eu/.../XXX"/>",
-    where XXX represents the language conforming to iso-639-3 standard.
-    However, some imported datasets represent their language as
-    "<dct:language>de</dct:language>", which will be interpreted here as
-    iso-639-1 values. As we do not display the language setting in the
-    web frontend, this function only assures the correct export format,
-    by using 'languages.json' as mapping table.
-    '''
-    global _LANGUAGES
-    if not _LANGUAGES:
-        _LANGUAGES = {}
-        languages_file_path = config.get('ckanext.odsh.language.mapping')
-        if not languages_file_path:
-            log.warning(
-                "Could not find config setting: 'ckanext.odsh.language.mapping', using fallback instead.")
-            languages_file_path = '/usr/lib/ckan/default/src/ckanext-odsh/languages.json'
-        with open(languages_file_path) as languages_file:
-            try:
-                language_mapping_table = json.loads(languages_file.read())
-            except ValueError, e:
-                # includes simplejson.decoder.JSONDecodeError
-                raise ValueError('Invalid JSON syntax in %s: %s' %
-                                 (languages_file_path, e))
-            for language_line in language_mapping_table:
-                _LANGUAGES[language_line[0]] = language_line[1]
-    return _LANGUAGES
\ No newline at end of file
--- a/out.uls
+++ b/out.uls
-V;2019-04-25 12:18:36;ckan274;"ULS";"Exception";"traceback";"Text";"
-Traceback (most recent call last):
-File \\"/usr/lib/ckan/default/src/ckanext-odsh/ckanext/odsh/plugin.py\\", line 358, in before_search
-raise BaseException('boom')
-BaseException: boom
-";
-V;2019-04-25 12:18:37;ckan274;"ULS";"Exception";"traceback";"Text";"
-Traceback (most recent call last):
-File \\"/usr/lib/ckan/default/src/ckanext-odsh/ckanext/odsh/plugin.py\\", line 358, in before_search
-raise BaseException('boom')
-BaseException: boom
-";
-V;2019-04-25 12:18:38;ckan274;"ULS";"Exception";"traceback";"Text";"
-Traceback (most recent call last):
-File \\"/usr/lib/ckan/default/src/ckanext-odsh/ckanext/odsh/plugin.py\\", line 358, in before_search
-raise BaseException('boom')
-BaseException: boom
-";
-V;2019-04-25 12:18:40;ckan274;"ULS";"Exception";"traceback";"Text";"
-Traceback (most recent call last):
-File \\"/usr/lib/ckan/default/src/ckanext-odsh/ckanext/odsh/plugin.py\\", line 358, in before_search
-raise BaseException('boom')
-BaseException: boom
-";
-V;2019-04-25 12:18:41;ckan274;"ULS";"Exception";"traceback";"Text";"
-Traceback (most recent call last):
-File \\"/usr/lib/ckan/default/src/ckanext-odsh/ckanext/odsh/plugin.py\\", line 358, in before_search
-raise BaseException('boom')
-BaseException: boom
-";
--- a/validation.py
+++ b/validation.py
-# This Python file uses the following encoding: utf-8
-import logging
-import csv
-import re
-import urllib2
-import json
-from itertools import count
-from dateutil.parser import parse
-import ckan.plugins.toolkit as toolkit
-import ckan.model as model
-from ckan.lib.navl.dictization_functions import Missing
-from pylons import config
-import pdb
-_ = toolkit._
-log = logging.getLogger(__name__)
-def _extract_value(data, field):
-    key = None
-    for k in data.keys():
-        if data[k] == field:
-            key = k
-            break
-    if key is None:
-        return None
-    return data[(key[0], key[1], 'value')]
-def validate_extra_groups(data, requireAtLeastOne, errors):
-    value = _extract_value(data, 'groups')
-    if value != None:
-        # 'value != None' means the extra key 'groups' was found,
-        # so the dataset came from manual editing via the web-frontend.
-        if not value:
-            if requireAtLeastOne:
-                errors['groups'] = 'at least one group needed'
-            data[('groups', 0, 'id')] = ''
-            return
-        groups = [g.strip() for g in value.split(',') if value.strip()]
-        for k in data.keys():
-            if len(k) == 3 and k[0] == 'groups':
-                data[k] = ''
-                # del data[k]
-        if len(groups) == 0:
-            if requireAtLeastOne:
-                errors['groups'] = 'at least one group needed'
-            return
-        for num, group in zip(range(len(groups)), groups):
-            data[('groups', num, 'id')] = group
-    else:  # no extra-field 'groups'
-        # dataset might come from a harvest process
-        if not data.get(('groups', 0, 'id'), False) and \
-           not data.get(('groups', 0, 'name'), False):
-            errors['groups'] = 'at least one group needed'
-def validate_extras(key, data, errors, context):
-    extra_errors = {}
-    isStaNord = ('id',) in data and data[('id',)][:7] == 'StaNord'
-    validate_extra_groups(data, True, extra_errors)
-    validate_extra_date_new(key, 'issued', data, isStaNord, extra_errors)
-    validate_extra_date_new(key, 'temporal_start',
-                            data, isStaNord, extra_errors)
-    validate_extra_date_new(key, 'temporal_end', data, True, extra_errors)
-    if len(extra_errors.values()):
-        raise toolkit.Invalid(extra_errors)
-def _set_value(data, field, value):
-    key = None
-    for k in data.keys():
-        if data[k] == field:
-            key = k
-            break
-    if key is None:
-        return None
-    data[(key[0], key[1], 'value')] = value
-def validate_extra_date_new(key, field, data, optional, errors):
-    value = _extract_value(data, field)
-    if not value:
-        if not optional:
-            errors[field] = 'empty'
-        return
-    else:
-        if re.match(r'\d\d\d\d-\d\d-\d\d', value):
-            try:
-                dt = parse(value)
-                _set_value(data, field, dt.isoformat())
-                return
-            except ValueError:
-                pass
-        errors[field] = 'not a valid date'
-def validate_licenseAttributionByText(key, data, errors, context):
-    register = model.Package.get_license_register()
-    isByLicense = False
-    for k in data:
-        if len(k) > 0 and k[0] == 'license_id' and data[k] and not isinstance(data[k], Missing) and \
-                'Namensnennung' in register[data[k]].title:
-            isByLicense = True
-            break
-    hasAttribution = False
-    for k in data:
-        if data[k] == 'licenseAttributionByText':
-            if isinstance(data[(k[0], k[1], 'value')], Missing) or (k[0], k[1], 'value') not in data:
-                del data[(k[0], k[1], 'value')]
-                del data[(k[0], k[1], 'key')]
-                break
-            else:
-                value = data[(k[0], k[1], 'value')]
-                hasAttribution = value != ''
-                break
-    if not hasAttribution:
-        current_indexes = [k[1] for k in data.keys()
-                           if len(k) > 1 and k[0] == 'extras']
-        new_index = max(current_indexes) + 1 if current_indexes else 0
-        data[('extras', new_index, 'key')] = 'licenseAttributionByText'
-        data[('extras', new_index, 'value')] = ''
-    if isByLicense and not hasAttribution:
-        raise toolkit.Invalid(
-            'licenseAttributionByText: empty not allowed')
-    if not isByLicense and hasAttribution:
-        raise toolkit.Invalid(
-            'licenseAttributionByText: text not allowed for this license')
-def known_spatial_uri(key, data, errors, context):
-    value = _extract_value(data, 'spatial_uri')
-    if not value:
-        poly = None
-        # some harvesters might import a polygon directly...
-        # pdb.set_trace()
-        poly = _extract_value(data, 'spatial')
-        has_old_uri = False
-        pkg = context.get('package', None)
-        if pkg:
-            old_uri = pkg.extras.get('spatial_uri', None)
-            has_old_uri = old_uri != None and len(old_uri) > 0
-            if not poly:
-                poly = pkg.extras.get('spatial', None)
-        if not poly or has_old_uri:
-            raise toolkit.Invalid('spatial_uri: empty not allowed')
-        else:
-            if poly:
-                new_index = next_extra_index(data)
-                data[('extras', new_index+1, 'key')] = 'spatial'
-                data[('extras', new_index+1, 'value')] = poly
-            return
-    mapping_file = config.get('ckanext.odsh.spatial.mapping')
-    try:
-        mapping_file = urllib2.urlopen(mapping_file)
-    except Exception:
-        raise Exception("Could not load spatial mapping file!")
-    not_found = True
-    spatial_text = str()
-    spatial = str()
-    cr = csv.reader(mapping_file, delimiter="\t")
-    for row in cr:
-        if row[0].encode('UTF-8') == value:
-            not_found = False
-            spatial_text = row[1]
-            loaded = json.loads(row[2])
-            spatial = json.dumps(loaded['geometry'])
-            break
-    if not_found:
-        raise toolkit.Invalid(
-            'spatial_uri: uri unknown')
-    new_index = next_extra_index(data)
-    data[('extras', new_index, 'key')] = 'spatial_text'
-    data[('extras', new_index, 'value')] = spatial_text
-    data[('extras', new_index+1, 'key')] = 'spatial'
-    data[('extras', new_index+1, 'value')] = spatial
-def next_extra_index(data):
-    current_indexes = [k[1] for k in data.keys()
-                       if len(k) > 1 and k[0] == 'extras']
-    return max(current_indexes) + 1 if current_indexes else 0
-def tag_name_validator(value, context):
-    tagname_match = re.compile('[\w \-.\:\(\)\´\`]*$', re.UNICODE)
-    if not tagname_match.match(value):
-        raise toolkit.Invalid(_('Tag "%s" must be alphanumeric '
-                                'characters or symbols: -_.:()') % (value))
-    return value
-def tag_string_convert(key, data, errors, context):
-    '''Takes a list of tags that is a comma-separated string (in data[key])
-    and parses tag names. These are added to the data dict, enumerated. They
-    are also validated.'''
-    if isinstance(data[key], basestring):
-        tags = [tag.strip()
-                for tag in data[key].split(',')
-                if tag.strip()]
-    else:
-        tags = data[key]
-    current_index = max([int(k[1]) for k in data.keys()
-                         if len(k) == 3 and k[0] == 'tags'] + [-1])
-    for num, tag in zip(count(current_index+1), tags):
-        data[('tags', num, 'name')] = tag
-    for tag in tags:
-        toolkit.get_validator('tag_length_validator')(tag, context)
-        tag_name_validator(tag, context)
-def get_validators():
-    return {
-        'known_spatial_uri': known_spatial_uri,
-        'odsh_tag_name_validator': tag_name_validator,
-        'odsh_validate_extras': validate_extras,
-        'validate_licenseAttributionByText': validate_licenseAttributionByText
-    }