Skip to content
Snippets Groups Projects
Commit 59248287 authored by anonymous's avatar anonymous
Browse files

Ignoring already known datasets is no error for kiel and statNord harvesters

parent b2072135
No related branches found
No related tags found
No related merge requests found
...@@ -35,6 +35,7 @@ class KielHarvester(ODSHBaseHarvester): ...@@ -35,6 +35,7 @@ class KielHarvester(ODSHBaseHarvester):
def gather_stage(self, harvest_job): def gather_stage(self, harvest_job):
url = harvest_job.source.url url = harvest_job.source.url
datasets = requests.get(url=url).json() datasets = requests.get(url=url).json()
count_known_dataset_ids = 0
try: try:
used_identifiers = [] used_identifiers = []
...@@ -54,6 +55,8 @@ class KielHarvester(ODSHBaseHarvester): ...@@ -54,6 +55,8 @@ class KielHarvester(ODSHBaseHarvester):
str(obj.id), str(obj.guid))) str(obj.id), str(obj.guid)))
used_identifiers.append(guid) used_identifiers.append(guid)
ids.append(obj.id) ids.append(obj.id)
else:
count_known_dataset_ids += 1
except Exception as e: except Exception as e:
self._save_gather_error( self._save_gather_error(
...@@ -69,6 +72,10 @@ class KielHarvester(ODSHBaseHarvester): ...@@ -69,6 +72,10 @@ class KielHarvester(ODSHBaseHarvester):
log.debug("List of gathered IDs: %s" % ids) log.debug("List of gathered IDs: %s" % ids)
log.debug("gather_stage() finished: %s IDs gathered" % len(ids)) log.debug("gather_stage() finished: %s IDs gathered" % len(ids))
return ids return ids
elif count_known_dataset_ids > 0:
log.info("Gathered " + str(count_known_dataset_ids) +
" datasets already stored in the database. No new datasets found.")
return []
else: else:
log.error("No records received") log.error("No records received")
self._save_gather_error( self._save_gather_error(
......
...@@ -37,6 +37,7 @@ class StatistikamtNordHarvester(ODSHBaseHarvester): ...@@ -37,6 +37,7 @@ class StatistikamtNordHarvester(ODSHBaseHarvester):
def gather_stage(self, harvest_job): def gather_stage(self, harvest_job):
url = harvest_job.source.url url = harvest_job.source.url
count_known_dataset_ids = 0
try: try:
log.info('Stat_Nord_Harvester: Beginning gather stage') log.info('Stat_Nord_Harvester: Beginning gather stage')
...@@ -79,6 +80,8 @@ class StatistikamtNordHarvester(ODSHBaseHarvester): ...@@ -79,6 +80,8 @@ class StatistikamtNordHarvester(ODSHBaseHarvester):
used_identifiers.append(identifier) used_identifiers.append(identifier)
ids.append(obj.id) ids.append(obj.id)
log.debug('Save identifier %s from Statistik Nord' % identifier) log.debug('Save identifier %s from Statistik Nord' % identifier)
else:
count_known_dataset_ids += 1
except Exception, e: except Exception, e:
log.error('traceback: %s' % traceback.format_exc()) log.error('traceback: %s' % traceback.format_exc())
...@@ -97,6 +100,10 @@ class StatistikamtNordHarvester(ODSHBaseHarvester): ...@@ -97,6 +100,10 @@ class StatistikamtNordHarvester(ODSHBaseHarvester):
log.info("finished %s IDs of %s IDs successfully gathered" % (len(used_identifiers), len(documents))) log.info("finished %s IDs of %s IDs successfully gathered" % (len(used_identifiers), len(documents)))
log.debug("gather_stage() finished: %s IDs gathered" % len(ids)) log.debug("gather_stage() finished: %s IDs gathered" % len(ids))
return ids return ids
elif count_known_dataset_ids > 0:
log.info("Gathered " + str(count_known_dataset_ids) +
" datasets already stored in the database. No new datasets found.")
return []
else: else:
log.error("No records received") log.error("No records received")
self._save_gather_error("Couldn't find any metadata files", harvest_job) self._save_gather_error("Couldn't find any metadata files", harvest_job)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment