Skip to content
Snippets Groups Projects
Commit 4db5387c authored by chbaeh's avatar chbaeh
Browse files

adjust harvesters

parent a307ef53
No related branches found
No related tags found
No related merge requests found
...@@ -39,23 +39,26 @@ class KielHarvester(ODSHBaseHarvester): ...@@ -39,23 +39,26 @@ class KielHarvester(ODSHBaseHarvester):
try: try:
used_identifiers = [] used_identifiers = []
ids = [] ids = []
package_ids_in_db = list(map(lambda x: x[0], model.Session.query(HarvestObject.guid)\ package_ids_in_db = list(map(lambda x: x[0], model.Session.query(HarvestObject.guid)
.filter(HarvestObject.current == True)\ .filter(HarvestObject.current == True)
.filter(HarvestObject.harvest_source_id == harvest_job.source.id).all())) .filter(HarvestObject.harvest_source_id == harvest_job.source.id).all()))
log.info("Package IDs in DB: %s" % str(package_ids_in_db)) log.info("Package IDs in DB: %s" % str(package_ids_in_db))
for dataset in datasets: for dataset in datasets:
guid = str(uuid.uuid3(uuid.NAMESPACE_URL, dataset.get("url").encode('ascii', 'ignore'))) guid = str(uuid.uuid3(uuid.NAMESPACE_URL,
dataset.get("url").encode('ascii', 'ignore')))
if guid not in package_ids_in_db: if guid not in package_ids_in_db:
obj = HarvestObject(job=harvest_job, guid=guid) obj = HarvestObject(job=harvest_job, guid=guid)
obj.content = json.dumps(dataset) obj.content = json.dumps(dataset)
obj.save() obj.save()
log.info("harvest_object_id: %s, GUID: %s successfully gathered " % (str(obj.id), str(obj.guid))) log.info("harvest_object_id: %s, GUID: %s successfully gathered " % (
str(obj.id), str(obj.guid)))
used_identifiers.append(guid) used_identifiers.append(guid)
ids.append(obj.id) ids.append(obj.id)
except Exception as e: except Exception as e:
self._save_gather_error( self._save_gather_error(
'Statistik-Nord-Harvester: Error gathering the identifiers from the source server [%s]' % str(e), 'Kiel-Harvester: Error gathering the identifiers from the source server [%s]' % str(
e),
harvest_job) harvest_job)
log.error(e) log.error(e)
return None return None
...@@ -68,7 +71,8 @@ class KielHarvester(ODSHBaseHarvester): ...@@ -68,7 +71,8 @@ class KielHarvester(ODSHBaseHarvester):
return ids return ids
else: else:
log.error("No records received") log.error("No records received")
self._save_gather_error("Couldn't find any metadata files", harvest_job) self._save_gather_error(
"Couldn't find any metadata files", harvest_job)
return None return None
@staticmethod @staticmethod
...@@ -79,6 +83,7 @@ class KielHarvester(ODSHBaseHarvester): ...@@ -79,6 +83,7 @@ class KielHarvester(ODSHBaseHarvester):
return False return False
def import_stage(self, harvest_object): def import_stage(self, harvest_object):
log.debug('IMPORT')
context = { context = {
'model': model, 'model': model,
'session': model.Session, 'session': model.Session,
...@@ -89,12 +94,14 @@ class KielHarvester(ODSHBaseHarvester): ...@@ -89,12 +94,14 @@ class KielHarvester(ODSHBaseHarvester):
return False return False
if harvest_object.content is None: if harvest_object.content is None:
self._save_object_error('Empty content for object %s' % harvest_object.id, harvest_object, 'Import') self._save_object_error(
'Empty content for object %s' % harvest_object.id, harvest_object, 'Import')
return False return False
else: else:
package_dict = json.loads(harvest_object.content) package_dict = json.loads(harvest_object.content)
source_dataset = get_action('package_show')(context.copy(), {'id': harvest_object.source.id}) source_dataset = get_action('package_show')(
context.copy(), {'id': harvest_object.source.id})
package_dict['owner_org'] = source_dataset.get('owner_org') package_dict['owner_org'] = source_dataset.get('owner_org')
if package_dict['type'] == 'datensatz': if package_dict['type'] == 'datensatz':
...@@ -115,25 +122,35 @@ class KielHarvester(ODSHBaseHarvester): ...@@ -115,25 +122,35 @@ class KielHarvester(ODSHBaseHarvester):
package_dict['groups'] = mapped_groups package_dict['groups'] = mapped_groups
extras = package_dict['extras'] extras = package_dict['extras']
package_dict['extras'] = list() new_extras = list()
for extra in extras: for extra in extras:
if extra['key'] in ['temporal_start', 'temporal_end', 'issued']: if extra['key'] in ['temporal_start', 'temporal_end', 'issued']:
package_dict[extra['key']] = extra['value'] new_extras.append(extra)
new_extras.append(
{'spatial_uri': 'http://dcat-ap.de/def/politicalGeocoding/districtKey/01002'})
package_dict['extras'] = new_extras
package_dict['spatial_uri'] = 'http://dcat-ap.de/def/politicalGeocoding/districtKey/01002' log.debug(package_dict['extras'])
license_id = self._get_license_id(package_dict['license_id']) license_id = self._get_license_id(package_dict['license_id'])
if license_id: if license_id:
package_dict['license_id'] = license_id package_dict['license_id'] = license_id
else: else:
log.error('invalid license_id: %s' % package_dict['license_id']) log.error('invalid license_id: %s' %
self._save_object_error('Invalid license_id: %s' % package_dict['license_id'], harvest_object, 'Import') package_dict['license_id'])
self._save_object_error(
'Invalid license_id: %s' % package_dict['license_id'], harvest_object, 'Import')
return False return False
try: try:
context = {'user': self._get_user_name(), 'return_id_only': True, 'ignore_auth': True} context = {'user': self._get_user_name(
package_plugin = lib_plugins.lookup_package_plugin(package_dict.get('type', None)) ), 'return_id_only': True, 'ignore_auth': True}
package_plugin = lib_plugins.lookup_package_plugin(
package_dict.get('type', None))
package_schema = package_plugin.create_package_schema() package_schema = package_plugin.create_package_schema()
context['schema'] = package_schema context['schema'] = package_schema
log.debug(package_schema)
self._handle_current_harvest_object(harvest_object, harvest_object.guid) self._handle_current_harvest_object(harvest_object, harvest_object.guid)
result = toolkit.get_action('package_create')(context, package_dict) result = toolkit.get_action('package_create')(context, package_dict)
return result return result
......
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment