diff --git a/ckanext/odsh/plugin.py b/ckanext/odsh/plugin.py index a07373208c9d73389433055090a7daa3614c6fdc..e80acd41fcb6975d05825a9df219eb35746d15be 100644 --- a/ckanext/odsh/plugin.py +++ b/ckanext/odsh/plugin.py @@ -12,10 +12,12 @@ import ckan.lib.helpers as helpers import helpers as odsh_helpers import ckanext.odsh.logic.action as action from ckanext.dcat.interfaces import IDCATRDFHarvester +from ckanext.dcatde.extras import Extras from routes.mapper import SubMapper from pylons import config from dateutil.parser import parse +from ckan import model import ckan.plugins as p @@ -207,9 +209,36 @@ class OdshDCATHarvestPlugin(plugins.SingletonPlugin): plugins.implements(IDCATRDFHarvester, inherit=True) def before_update(self, harvest_object, dataset_dict, temp_dict): - dataset_dict['title']="Title" - print("BEFORE update") - # self._before(dataset_dict, temp_dict, harvest_object) + + existing_package_dict = self._get_existing_dataset(harvest_object.guid) + new_dataset_extras = Extras(dataset_dict['extras']) + if new_dataset_extras.key('modified') and \ + new_dataset_extras.value('modified') < existing_package_dict.get('metadata_modified'): + log.info("Modified date of new dataset is not newer than " + + "the already exisiting dataset, ignoring new one.") + dataset_dict.clear() + + def _get_existing_dataset(self, guid): + ''' + Checks if a dataset with a certain guid extra already exists + + Returns a dict as the ones returned by package_show + ''' + + datasets = model.Session.query(model.Package.id) \ + .join(model.PackageExtra) \ + .filter(model.PackageExtra.key == 'guid') \ + .filter(model.PackageExtra.value == guid) \ + .filter(model.Package.state == 'active') \ + .all() + + if not datasets: + return None + elif len(datasets) > 1: + log.error('Found more than one dataset with the same guid: {0}' + .format(guid)) + + return p.toolkit.get_action('package_show')({}, {'id': datasets[0][0]}) class OdshPlugin(plugins.SingletonPlugin, DefaultTranslation, DefaultDatasetForm): diff --git a/ckanext/odsh/profiles.py b/ckanext/odsh/profiles.py index 02400cd52ba0b7a6447c48bcb747c15bd234dd06..416f1f76c1cec7c4b980e9b8296fdafaa92748d9 100644 --- a/ckanext/odsh/profiles.py +++ b/ckanext/odsh/profiles.py @@ -7,6 +7,7 @@ import ckanext.dcatde.dataset_utils as ds_utils import logging from ckan.plugins import toolkit from ckan.common import config, json +from ckanext.dcat.interfaces import IDCATRDFHarvester import sys if sys.version_info[0] == 2: @@ -62,7 +63,7 @@ class ODSHEuropeanDCATAPProfile(EuropeanDCATAPProfile): self.g.set((s, p, rdflib.URIRef(get_language()[o.decode()]))) elif type(o) == rdflib.Literal and type(URIRefOrLiteral(o.decode())) == rdflib.URIRef: self.g.set((s, p, rdflib.URIRef(o.decode()) )) - + class ODSHDCATdeProfile(DCATdeProfile): def parse_dataset(self, dataset_dict, dataset_ref): @@ -95,11 +96,11 @@ def resource_formats(): _RESOURCE_FORMATS_IMPORT = {} _RESOURCE_FORMATS_EXPORT = {} g = rdflib.Graph() - + err_msg = "" # at first try to get the actual file list online: try: format_european_url = config.get('ckan.odsh.resource_formats_url') - + err_msg = "Could not get file formats from " + format_european_url if not format_european_url: log.warning("Could not find config setting: 'ckan.odsh.resource_formats_url', using fallback instead.") format_european_url = "http://publications.europa.eu/resource/authority/file-type" @@ -116,6 +117,7 @@ def resource_formats(): urlresponse = urllib2.urlopen(urllib2.Request(format_european_url)) elif sys.version_info[0] == 3: # >=Python3.1 urlresponse = urllib.request.urlopen(urllib.request.Request(format_european_url)) + err_msg = "Could not write to /usr/lib/ckan/default/src/ckanext-odsh/ckanext/odsh/fileformats.rdf" f = open('/usr/lib/ckan/default/src/ckanext-odsh/ckanext/odsh/fileformats.rdf', 'w') f.write(urlresponse.read()) f.close() @@ -124,8 +126,9 @@ def resource_formats(): try: g.parse('/usr/lib/ckan/default/src/ckanext-odsh/ckanext/odsh/fileformats.rdf') assert len(set([s for s in g.subjects()])) > 120 + log.warning("Could not get file formats from " + format_european_url + ", using fallback instead.") except: - raise Exception("Could not get file formats from " + format_european_url) + raise Exception(err_msg) file_types = [subj.decode() for subj in g.subjects()] for elem in sorted(set(file_types)):