From 592482877f85592888aab1b486bad786f714abf1 Mon Sep 17 00:00:00 2001 From: anonymous <anonymous> Date: Fri, 17 May 2019 10:26:07 +0200 Subject: [PATCH] Ignoring already known datasets is no error for kiel and statNord harvesters --- ckanext/odsh/harvesters/kielharvester.py | 7 +++++++ ckanext/odsh/harvesters/statistikamtnordharvester.py | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/ckanext/odsh/harvesters/kielharvester.py b/ckanext/odsh/harvesters/kielharvester.py index 8ab1de94..55a7317e 100755 --- a/ckanext/odsh/harvesters/kielharvester.py +++ b/ckanext/odsh/harvesters/kielharvester.py @@ -35,6 +35,7 @@ class KielHarvester(ODSHBaseHarvester): def gather_stage(self, harvest_job): url = harvest_job.source.url datasets = requests.get(url=url).json() + count_known_dataset_ids = 0 try: used_identifiers = [] @@ -54,6 +55,8 @@ class KielHarvester(ODSHBaseHarvester): str(obj.id), str(obj.guid))) used_identifiers.append(guid) ids.append(obj.id) + else: + count_known_dataset_ids += 1 except Exception as e: self._save_gather_error( @@ -69,6 +72,10 @@ class KielHarvester(ODSHBaseHarvester): log.debug("List of gathered IDs: %s" % ids) log.debug("gather_stage() finished: %s IDs gathered" % len(ids)) return ids + elif count_known_dataset_ids > 0: + log.info("Gathered " + str(count_known_dataset_ids) + + " datasets already stored in the database. No new datasets found.") + return [] else: log.error("No records received") self._save_gather_error( diff --git a/ckanext/odsh/harvesters/statistikamtnordharvester.py b/ckanext/odsh/harvesters/statistikamtnordharvester.py index e7225849..50a082a6 100755 --- a/ckanext/odsh/harvesters/statistikamtnordharvester.py +++ b/ckanext/odsh/harvesters/statistikamtnordharvester.py @@ -37,6 +37,7 @@ class StatistikamtNordHarvester(ODSHBaseHarvester): def gather_stage(self, harvest_job): url = harvest_job.source.url + count_known_dataset_ids = 0 try: log.info('Stat_Nord_Harvester: Beginning gather stage') @@ -79,6 +80,8 @@ class StatistikamtNordHarvester(ODSHBaseHarvester): used_identifiers.append(identifier) ids.append(obj.id) log.debug('Save identifier %s from Statistik Nord' % identifier) + else: + count_known_dataset_ids += 1 except Exception, e: log.error('traceback: %s' % traceback.format_exc()) @@ -97,6 +100,10 @@ class StatistikamtNordHarvester(ODSHBaseHarvester): log.info("finished %s IDs of %s IDs successfully gathered" % (len(used_identifiers), len(documents))) log.debug("gather_stage() finished: %s IDs gathered" % len(ids)) return ids + elif count_known_dataset_ids > 0: + log.info("Gathered " + str(count_known_dataset_ids) + + " datasets already stored in the database. No new datasets found.") + return [] else: log.error("No records received") self._save_gather_error("Couldn't find any metadata files", harvest_job) -- GitLab