Skip to content
Snippets Groups Projects
Commit 0b47fb46 authored by anonymous's avatar anonymous
Browse files

modifications for new kiel harvesting

parent bdeb4f59
No related branches found
No related tags found
No related merge requests found
import ckan.model as model
from ckanext.harvest.model import HarvestObject
from ckanext.harvest.harvesters.base import HarvesterBase from ckanext.harvest.harvesters.base import HarvesterBase
...@@ -6,3 +8,23 @@ class ODSHBaseHarvester(HarvesterBase): ...@@ -6,3 +8,23 @@ class ODSHBaseHarvester(HarvesterBase):
license_mapping = {'dl-de-zero-2.0': 'http://dcat-ap.de/def/licenses/dl-zero-de/2.0', license_mapping = {'dl-de-zero-2.0': 'http://dcat-ap.de/def/licenses/dl-zero-de/2.0',
'dl-de-by-2.0': "http://dcat-ap.de/def/licenses/dl-by-de/2.0"} 'dl-de-by-2.0': "http://dcat-ap.de/def/licenses/dl-by-de/2.0"}
return license_mapping.get(license_id, None) return license_mapping.get(license_id, None)
def _handle_current_harvest_object(self, harvest_object, package_id):
# Get the last harvested object (if any)
previous_object = model.Session.query(HarvestObject) \
.filter(HarvestObject.guid==harvest_object.guid) \
.filter(HarvestObject.current==True) \
.first()
# Flag previous object as not current anymore
if previous_object:
previous_object.current = False
previous_object.add()
# Flag this object as the current one
harvest_object.current = True
harvest_object.package_id = package_id
harvest_object.add()
model.Session.execute('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED')
model.Session.flush()
from ckan import model from ckan import model
from ckan.logic import get_action from ckan.logic import get_action
from ckan.plugins import toolkit from ckan.plugins import toolkit
import ckan.lib.plugins as lib_plugins
from ckanext.harvest.model import HarvestObject from ckanext.harvest.model import HarvestObject
from ckanext.odsh.harvesters.base import ODSHBaseHarvester from ckanext.odsh.harvesters.base import ODSHBaseHarvester
...@@ -40,8 +41,13 @@ class KielHarvester(ODSHBaseHarvester): ...@@ -40,8 +41,13 @@ class KielHarvester(ODSHBaseHarvester):
try: try:
used_identifiers = [] used_identifiers = []
ids = [] ids = []
package_ids_in_db = list(map(lambda x: x[0], model.Session.query(HarvestObject.guid)\
.filter(HarvestObject.current == True)\
.filter(HarvestObject.harvest_source_id == harvest_job.source.id).all()))
log.info("Package IDs in DB: %s" % str(package_ids_in_db))
for dataset in datasets: for dataset in datasets:
guid = str(uuid.uuid3(uuid.NAMESPACE_URL, dataset.get("url").encode('ascii', 'ignore'))) guid = str(uuid.uuid3(uuid.NAMESPACE_URL, dataset.get("url").encode('ascii', 'ignore')))
if guid not in package_ids_in_db:
obj = HarvestObject(job=harvest_job, guid=guid) obj = HarvestObject(job=harvest_job, guid=guid)
obj.content = json.dumps(dataset) obj.content = json.dumps(dataset)
obj.save() obj.save()
...@@ -113,15 +119,15 @@ class KielHarvester(ODSHBaseHarvester): ...@@ -113,15 +119,15 @@ class KielHarvester(ODSHBaseHarvester):
package_dict['groups'] = mapped_groups package_dict['groups'] = mapped_groups
published = str() published = str()
extras = package_dict['extras']
package_dict['extras'] = list() package_dict['extras'] = list()
for extra in package_dict['extras']: for extra in extras:
if extra['key'] == 'dates': if extra['key'] == 'dates':
published = extra['value']['date'] package_dict['issued'] = extra['value'][0]['date']
package_dict['extras'].append({'key': 'issued', 'value': published})
elif extra['key'] in ['temporal_start', 'temporal_end']: elif extra['key'] in ['temporal_start', 'temporal_end']:
package_dict['extras'].append(extra) package_dict[extra['key']] = extra['value']
package_dict['extras'].append({'key': 'spatial_uri', 'value': 'http://dcat-ap.de/def/politicalGeocoding/districtKey/01002'}) package_dict['spatial_uri'] = 'http://dcat-ap.de/def/politicalGeocoding/districtKey/01002'
#license_id = self._get_license_id(package_dict['license_id']) #license_id = self._get_license_id(package_dict['license_id'])
license_id = 'http://dcat-ap.de/def/licenses/dl-zero-de/2.0' license_id = 'http://dcat-ap.de/def/licenses/dl-zero-de/2.0'
...@@ -132,7 +138,13 @@ class KielHarvester(ODSHBaseHarvester): ...@@ -132,7 +138,13 @@ class KielHarvester(ODSHBaseHarvester):
self._save_object_error('Invalid license_id: %s' % package_dict['license_id'], harvest_object, 'Import') self._save_object_error('Invalid license_id: %s' % package_dict['license_id'], harvest_object, 'Import')
return False return False
try: try:
result = self._create_or_update_package(package_dict, harvest_object, package_dict_form='package_show') context = {'user': self._get_user_name(), 'return_id_only': True, 'ignore_auth': True}
package_plugin = lib_plugins.lookup_package_plugin(package_dict.get('type', None))
package_schema = package_plugin.create_package_schema()
context['schema'] = package_schema
log.info("Package Dict: %s" % str(package_dict))
self._handle_current_harvest_object(harvest_object, harvest_object.guid)
result = toolkit.get_action('package_create')(context, package_dict)
return result return result
except toolkit.ValidationError as e: except toolkit.ValidationError as e:
self._save_object_error('Validation Error: %s' % str(e.error_summary), harvest_object, 'Import') self._save_object_error('Validation Error: %s' % str(e.error_summary), harvest_object, 'Import')
......
...@@ -203,7 +203,7 @@ class OdshPlugin(plugins.SingletonPlugin, DefaultTranslation, DefaultDatasetForm ...@@ -203,7 +203,7 @@ class OdshPlugin(plugins.SingletonPlugin, DefaultTranslation, DefaultDatasetForm
return ['title','notes'] return ['title','notes']
def _extraFields(self): def _extraFields(self):
return ['publish_date', 'temporal_start', 'temporal_end', 'spatial_uri'] return ['issued', 'temporal_start', 'temporal_end', 'spatial_uri']
def _extraFieldsOptional(self): def _extraFieldsOptional(self):
return ['access_constraints'] return ['access_constraints']
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment