Skip to content
Snippets Groups Projects
Commit 45bdc1df authored by anonymous's avatar anonymous
Browse files

Dcat-datasets will be ignored when harvesting, if their modified-date is older...

Dcat-datasets will be ignored when harvesting, if their modified-date is older than the same already existing dataset
parent 10e360cc
No related branches found
No related tags found
No related merge requests found
......@@ -12,10 +12,12 @@ import ckan.lib.helpers as helpers
import helpers as odsh_helpers
import ckanext.odsh.logic.action as action
from ckanext.dcat.interfaces import IDCATRDFHarvester
from ckanext.dcatde.extras import Extras
from routes.mapper import SubMapper
from pylons import config
from dateutil.parser import parse
from ckan import model
import ckan.plugins as p
......@@ -207,9 +209,36 @@ class OdshDCATHarvestPlugin(plugins.SingletonPlugin):
plugins.implements(IDCATRDFHarvester, inherit=True)
def before_update(self, harvest_object, dataset_dict, temp_dict):
dataset_dict['title']="Title"
print("BEFORE update")
# self._before(dataset_dict, temp_dict, harvest_object)
existing_package_dict = self._get_existing_dataset(harvest_object.guid)
new_dataset_extras = Extras(dataset_dict['extras'])
if new_dataset_extras.key('modified') and \
new_dataset_extras.value('modified') < existing_package_dict.get('metadata_modified'):
log.info("Modified date of new dataset is not newer than "
+ "the already exisiting dataset, ignoring new one.")
dataset_dict.clear()
def _get_existing_dataset(self, guid):
'''
Checks if a dataset with a certain guid extra already exists
Returns a dict as the ones returned by package_show
'''
datasets = model.Session.query(model.Package.id) \
.join(model.PackageExtra) \
.filter(model.PackageExtra.key == 'guid') \
.filter(model.PackageExtra.value == guid) \
.filter(model.Package.state == 'active') \
.all()
if not datasets:
return None
elif len(datasets) > 1:
log.error('Found more than one dataset with the same guid: {0}'
.format(guid))
return p.toolkit.get_action('package_show')({}, {'id': datasets[0][0]})
class OdshPlugin(plugins.SingletonPlugin, DefaultTranslation, DefaultDatasetForm):
......
......@@ -7,6 +7,7 @@ import ckanext.dcatde.dataset_utils as ds_utils
import logging
from ckan.plugins import toolkit
from ckan.common import config, json
from ckanext.dcat.interfaces import IDCATRDFHarvester
import sys
if sys.version_info[0] == 2:
......@@ -95,11 +96,11 @@ def resource_formats():
_RESOURCE_FORMATS_IMPORT = {}
_RESOURCE_FORMATS_EXPORT = {}
g = rdflib.Graph()
err_msg = ""
# at first try to get the actual file list online:
try:
format_european_url = config.get('ckan.odsh.resource_formats_url')
err_msg = "Could not get file formats from " + format_european_url
if not format_european_url:
log.warning("Could not find config setting: 'ckan.odsh.resource_formats_url', using fallback instead.")
format_european_url = "http://publications.europa.eu/resource/authority/file-type"
......@@ -116,6 +117,7 @@ def resource_formats():
urlresponse = urllib2.urlopen(urllib2.Request(format_european_url))
elif sys.version_info[0] == 3: # >=Python3.1
urlresponse = urllib.request.urlopen(urllib.request.Request(format_european_url))
err_msg = "Could not write to /usr/lib/ckan/default/src/ckanext-odsh/ckanext/odsh/fileformats.rdf"
f = open('/usr/lib/ckan/default/src/ckanext-odsh/ckanext/odsh/fileformats.rdf', 'w')
f.write(urlresponse.read())
f.close()
......@@ -124,8 +126,9 @@ def resource_formats():
try:
g.parse('/usr/lib/ckan/default/src/ckanext-odsh/ckanext/odsh/fileformats.rdf')
assert len(set([s for s in g.subjects()])) > 120
log.warning("Could not get file formats from " + format_european_url + ", using fallback instead.")
except:
raise Exception("Could not get file formats from " + format_european_url)
raise Exception(err_msg)
file_types = [subj.decode() for subj in g.subjects()]
for elem in sorted(set(file_types)):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment