diff --git a/ckanext/odsh/harvesters/base.py b/ckanext/odsh/harvesters/base.py index 288bb296e567dd41090eb2a1f9a5e94e8fc08276..692d0e077dc6b855961d021653d57482298111fa 100644 --- a/ckanext/odsh/harvesters/base.py +++ b/ckanext/odsh/harvesters/base.py @@ -1,3 +1,5 @@ +import ckan.model as model +from ckanext.harvest.model import HarvestObject from ckanext.harvest.harvesters.base import HarvesterBase @@ -6,3 +8,23 @@ class ODSHBaseHarvester(HarvesterBase): license_mapping = {'dl-de-zero-2.0': 'http://dcat-ap.de/def/licenses/dl-zero-de/2.0', 'dl-de-by-2.0': "http://dcat-ap.de/def/licenses/dl-by-de/2.0"} return license_mapping.get(license_id, None) + + def _handle_current_harvest_object(self, harvest_object, package_id): + # Get the last harvested object (if any) + previous_object = model.Session.query(HarvestObject) \ + .filter(HarvestObject.guid==harvest_object.guid) \ + .filter(HarvestObject.current==True) \ + .first() + + # Flag previous object as not current anymore + if previous_object: + previous_object.current = False + previous_object.add() + + # Flag this object as the current one + harvest_object.current = True + harvest_object.package_id = package_id + harvest_object.add() + + model.Session.execute('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED') + model.Session.flush() diff --git a/ckanext/odsh/harvesters/kielharvester.py b/ckanext/odsh/harvesters/kielharvester.py index 3e85ff33fa576468657cd3928ea7c86142756b83..f84d69f5008e1796fffca11672ca54b700375b72 100755 --- a/ckanext/odsh/harvesters/kielharvester.py +++ b/ckanext/odsh/harvesters/kielharvester.py @@ -1,6 +1,7 @@ from ckan import model from ckan.logic import get_action from ckan.plugins import toolkit +import ckan.lib.plugins as lib_plugins from ckanext.harvest.model import HarvestObject from ckanext.odsh.harvesters.base import ODSHBaseHarvester @@ -40,14 +41,19 @@ class KielHarvester(ODSHBaseHarvester): try: used_identifiers = [] ids = [] + package_ids_in_db = list(map(lambda x: x[0], model.Session.query(HarvestObject.guid)\ + .filter(HarvestObject.current == True)\ + .filter(HarvestObject.harvest_source_id == harvest_job.source.id).all())) + log.info("Package IDs in DB: %s" % str(package_ids_in_db)) for dataset in datasets: guid = str(uuid.uuid3(uuid.NAMESPACE_URL, dataset.get("url").encode('ascii', 'ignore'))) - obj = HarvestObject(job=harvest_job, guid=guid) - obj.content = json.dumps(dataset) - obj.save() - log.info("harvest_object_id: %s, GUID: %s successfully gathered " % (str(obj.id), str(obj.guid))) - used_identifiers.append(guid) - ids.append(obj.id) + if guid not in package_ids_in_db: + obj = HarvestObject(job=harvest_job, guid=guid) + obj.content = json.dumps(dataset) + obj.save() + log.info("harvest_object_id: %s, GUID: %s successfully gathered " % (str(obj.id), str(obj.guid))) + used_identifiers.append(guid) + ids.append(obj.id) except Exception as e: self._save_gather_error( @@ -113,15 +119,15 @@ class KielHarvester(ODSHBaseHarvester): package_dict['groups'] = mapped_groups published = str() + extras = package_dict['extras'] package_dict['extras'] = list() - for extra in package_dict['extras']: + for extra in extras: if extra['key'] == 'dates': - published = extra['value']['date'] - package_dict['extras'].append({'key': 'issued', 'value': published}) + package_dict['issued'] = extra['value'][0]['date'] elif extra['key'] in ['temporal_start', 'temporal_end']: - package_dict['extras'].append(extra) + package_dict[extra['key']] = extra['value'] - package_dict['extras'].append({'key': 'spatial_uri', 'value': 'http://dcat-ap.de/def/politicalGeocoding/districtKey/01002'}) + package_dict['spatial_uri'] = 'http://dcat-ap.de/def/politicalGeocoding/districtKey/01002' #license_id = self._get_license_id(package_dict['license_id']) license_id = 'http://dcat-ap.de/def/licenses/dl-zero-de/2.0' @@ -132,7 +138,13 @@ class KielHarvester(ODSHBaseHarvester): self._save_object_error('Invalid license_id: %s' % package_dict['license_id'], harvest_object, 'Import') return False try: - result = self._create_or_update_package(package_dict, harvest_object, package_dict_form='package_show') + context = {'user': self._get_user_name(), 'return_id_only': True, 'ignore_auth': True} + package_plugin = lib_plugins.lookup_package_plugin(package_dict.get('type', None)) + package_schema = package_plugin.create_package_schema() + context['schema'] = package_schema + log.info("Package Dict: %s" % str(package_dict)) + self._handle_current_harvest_object(harvest_object, harvest_object.guid) + result = toolkit.get_action('package_create')(context, package_dict) return result except toolkit.ValidationError as e: self._save_object_error('Validation Error: %s' % str(e.error_summary), harvest_object, 'Import') diff --git a/ckanext/odsh/plugin.py b/ckanext/odsh/plugin.py index bdcdaeea858ea0e93bde9da4421d92eaca610ca6..c2b54d6ba46a2abfc6cdcffb4bedfa57a4d3734a 100644 --- a/ckanext/odsh/plugin.py +++ b/ckanext/odsh/plugin.py @@ -121,7 +121,7 @@ class OdshPlugin(plugins.SingletonPlugin, DefaultTranslation, DefaultDatasetForm toolkit.add_template_directory(config_, 'templates') toolkit.add_public_directory(config_, 'public') toolkit.add_resource('fanstatic', 'odsh') - + def get_helpers(self): # Template helper function names should begin with the name of the # extension they belong to, to avoid clashing with functions from @@ -176,7 +176,7 @@ class OdshPlugin(plugins.SingletonPlugin, DefaultTranslation, DefaultDatasetForm m.connect('user_datasets', '/user/{id:.*}', action='read', ckan_icon='sitemap') m.connect('user_index', '/user', action='index') - + return map def dataset_facets(self, facets_dict, package_type): @@ -203,7 +203,7 @@ class OdshPlugin(plugins.SingletonPlugin, DefaultTranslation, DefaultDatasetForm return ['title','notes'] def _extraFields(self): - return ['publish_date', 'temporal_start', 'temporal_end', 'spatial_uri'] + return ['issued', 'temporal_start', 'temporal_end', 'spatial_uri'] def _extraFieldsOptional(self): return ['access_constraints'] @@ -256,7 +256,7 @@ class OdshPlugin(plugins.SingletonPlugin, DefaultTranslation, DefaultDatasetForm # registers itself as the default (above). return [] - def get_validators(self): + def get_validators(self): return { 'odsh_convert_groups_string': odsh_convert_groups_string, 'known_spatial_uri': known_spatial_uri} - +