Skip to content
Snippets Groups Projects
Commit a3a243cc authored by anonymous's avatar anonymous
Browse files

adjust harvesters

parent 2fe5c77e
Branches
Tags
No related merge requests found
...@@ -39,23 +39,26 @@ class KielHarvester(ODSHBaseHarvester): ...@@ -39,23 +39,26 @@ class KielHarvester(ODSHBaseHarvester):
try: try:
used_identifiers = [] used_identifiers = []
ids = [] ids = []
package_ids_in_db = list(map(lambda x: x[0], model.Session.query(HarvestObject.guid)\ package_ids_in_db = list(map(lambda x: x[0], model.Session.query(HarvestObject.guid)
.filter(HarvestObject.current == True)\ .filter(HarvestObject.current == True)
.filter(HarvestObject.harvest_source_id == harvest_job.source.id).all())) .filter(HarvestObject.harvest_source_id == harvest_job.source.id).all()))
log.info("Package IDs in DB: %s" % str(package_ids_in_db)) log.info("Package IDs in DB: %s" % str(package_ids_in_db))
for dataset in datasets: for dataset in datasets:
guid = str(uuid.uuid3(uuid.NAMESPACE_URL, dataset.get("url").encode('ascii', 'ignore'))) guid = str(uuid.uuid3(uuid.NAMESPACE_URL,
dataset.get("url").encode('ascii', 'ignore')))
if guid not in package_ids_in_db: if guid not in package_ids_in_db:
obj = HarvestObject(job=harvest_job, guid=guid) obj = HarvestObject(job=harvest_job, guid=guid)
obj.content = json.dumps(dataset) obj.content = json.dumps(dataset)
obj.save() obj.save()
log.info("harvest_object_id: %s, GUID: %s successfully gathered " % (str(obj.id), str(obj.guid))) log.info("harvest_object_id: %s, GUID: %s successfully gathered " % (
str(obj.id), str(obj.guid)))
used_identifiers.append(guid) used_identifiers.append(guid)
ids.append(obj.id) ids.append(obj.id)
except Exception as e: except Exception as e:
self._save_gather_error( self._save_gather_error(
'Statistik-Nord-Harvester: Error gathering the identifiers from the source server [%s]' % str(e), 'Kiel-Harvester: Error gathering the identifiers from the source server [%s]' % str(
e),
harvest_job) harvest_job)
log.error(e) log.error(e)
return None return None
...@@ -68,7 +71,8 @@ class KielHarvester(ODSHBaseHarvester): ...@@ -68,7 +71,8 @@ class KielHarvester(ODSHBaseHarvester):
return ids return ids
else: else:
log.error("No records received") log.error("No records received")
self._save_gather_error("Couldn't find any metadata files", harvest_job) self._save_gather_error(
"Couldn't find any metadata files", harvest_job)
return None return None
@staticmethod @staticmethod
...@@ -79,6 +83,7 @@ class KielHarvester(ODSHBaseHarvester): ...@@ -79,6 +83,7 @@ class KielHarvester(ODSHBaseHarvester):
return False return False
def import_stage(self, harvest_object): def import_stage(self, harvest_object):
log.debug('IMPORT')
context = { context = {
'model': model, 'model': model,
'session': model.Session, 'session': model.Session,
...@@ -89,12 +94,14 @@ class KielHarvester(ODSHBaseHarvester): ...@@ -89,12 +94,14 @@ class KielHarvester(ODSHBaseHarvester):
return False return False
if harvest_object.content is None: if harvest_object.content is None:
self._save_object_error('Empty content for object %s' % harvest_object.id, harvest_object, 'Import') self._save_object_error(
'Empty content for object %s' % harvest_object.id, harvest_object, 'Import')
return False return False
else: else:
package_dict = json.loads(harvest_object.content) package_dict = json.loads(harvest_object.content)
source_dataset = get_action('package_show')(context.copy(), {'id': harvest_object.source.id}) source_dataset = get_action('package_show')(
context.copy(), {'id': harvest_object.source.id})
package_dict['owner_org'] = source_dataset.get('owner_org') package_dict['owner_org'] = source_dataset.get('owner_org')
if package_dict['type'] == 'datensatz': if package_dict['type'] == 'datensatz':
...@@ -115,25 +122,35 @@ class KielHarvester(ODSHBaseHarvester): ...@@ -115,25 +122,35 @@ class KielHarvester(ODSHBaseHarvester):
package_dict['groups'] = mapped_groups package_dict['groups'] = mapped_groups
extras = package_dict['extras'] extras = package_dict['extras']
package_dict['extras'] = list() new_extras = list()
for extra in extras: for extra in extras:
if extra['key'] in ['temporal_start', 'temporal_end', 'issued']: if extra['key'] in ['temporal_start', 'temporal_end', 'issued']:
package_dict[extra['key']] = extra['value'] new_extras.append(extra)
new_extras.append(
{'spatial_uri': 'http://dcat-ap.de/def/politicalGeocoding/districtKey/01002'})
package_dict['extras'] = new_extras
package_dict['spatial_uri'] = 'http://dcat-ap.de/def/politicalGeocoding/districtKey/01002' log.debug(package_dict['extras'])
license_id = self._get_license_id(package_dict['license_id']) license_id = self._get_license_id(package_dict['license_id'])
if license_id: if license_id:
package_dict['license_id'] = license_id package_dict['license_id'] = license_id
else: else:
log.error('invalid license_id: %s' % package_dict['license_id']) log.error('invalid license_id: %s' %
self._save_object_error('Invalid license_id: %s' % package_dict['license_id'], harvest_object, 'Import') package_dict['license_id'])
self._save_object_error(
'Invalid license_id: %s' % package_dict['license_id'], harvest_object, 'Import')
return False return False
try: try:
context = {'user': self._get_user_name(), 'return_id_only': True, 'ignore_auth': True} context = {'user': self._get_user_name(
package_plugin = lib_plugins.lookup_package_plugin(package_dict.get('type', None)) ), 'return_id_only': True, 'ignore_auth': True}
package_plugin = lib_plugins.lookup_package_plugin(
package_dict.get('type', None))
package_schema = package_plugin.create_package_schema() package_schema = package_plugin.create_package_schema()
context['schema'] = package_schema context['schema'] = package_schema
log.debug(package_schema)
self._handle_current_harvest_object(harvest_object, harvest_object.guid) self._handle_current_harvest_object(harvest_object, harvest_object.guid)
result = toolkit.get_action('package_create')(context, package_dict) result = toolkit.get_action('package_create')(context, package_dict)
return result return result
......
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment