From 592482877f85592888aab1b486bad786f714abf1 Mon Sep 17 00:00:00 2001
From: anonymous <anonymous>
Date: Fri, 17 May 2019 10:26:07 +0200
Subject: [PATCH] Ignoring already known datasets is no error for kiel and
 statNord harvesters

---
 ckanext/odsh/harvesters/kielharvester.py             | 7 +++++++
 ckanext/odsh/harvesters/statistikamtnordharvester.py | 7 +++++++
 2 files changed, 14 insertions(+)

diff --git a/ckanext/odsh/harvesters/kielharvester.py b/ckanext/odsh/harvesters/kielharvester.py
index 8ab1de94..55a7317e 100755
--- a/ckanext/odsh/harvesters/kielharvester.py
+++ b/ckanext/odsh/harvesters/kielharvester.py
@@ -35,6 +35,7 @@ class KielHarvester(ODSHBaseHarvester):
     def gather_stage(self, harvest_job):
         url = harvest_job.source.url
         datasets = requests.get(url=url).json()
+        count_known_dataset_ids = 0
 
         try:
             used_identifiers = []
@@ -54,6 +55,8 @@ class KielHarvester(ODSHBaseHarvester):
                         str(obj.id), str(obj.guid)))
                     used_identifiers.append(guid)
                     ids.append(obj.id)
+                else:
+                    count_known_dataset_ids += 1
 
         except Exception as e:
             self._save_gather_error(
@@ -69,6 +72,10 @@ class KielHarvester(ODSHBaseHarvester):
             log.debug("List of gathered IDs: %s" % ids)
             log.debug("gather_stage() finished: %s IDs gathered" % len(ids))
             return ids
+        elif count_known_dataset_ids > 0:
+            log.info("Gathered " + str(count_known_dataset_ids) + 
+              " datasets already stored in the database. No new datasets found.")
+            return []
         else:
             log.error("No records received")
             self._save_gather_error(
diff --git a/ckanext/odsh/harvesters/statistikamtnordharvester.py b/ckanext/odsh/harvesters/statistikamtnordharvester.py
index e7225849..50a082a6 100755
--- a/ckanext/odsh/harvesters/statistikamtnordharvester.py
+++ b/ckanext/odsh/harvesters/statistikamtnordharvester.py
@@ -37,6 +37,7 @@ class StatistikamtNordHarvester(ODSHBaseHarvester):
 
     def gather_stage(self, harvest_job):
         url = harvest_job.source.url
+        count_known_dataset_ids = 0
 
         try:
             log.info('Stat_Nord_Harvester: Beginning gather stage')
@@ -79,6 +80,8 @@ class StatistikamtNordHarvester(ODSHBaseHarvester):
                         used_identifiers.append(identifier)
                         ids.append(obj.id)
                         log.debug('Save identifier %s from Statistik Nord' % identifier)
+                    else:
+                        count_known_dataset_ids += 1
 
                 except Exception, e:
                     log.error('traceback: %s' % traceback.format_exc())
@@ -97,6 +100,10 @@ class StatistikamtNordHarvester(ODSHBaseHarvester):
             log.info("finished %s IDs of %s IDs successfully gathered" % (len(used_identifiers), len(documents)))
             log.debug("gather_stage() finished: %s IDs gathered" % len(ids))
             return ids
+        elif count_known_dataset_ids > 0:
+            log.info("Gathered " + str(count_known_dataset_ids) + 
+              " datasets already stored in the database. No new datasets found.")
+            return []
         else:
             log.error("No records received")
             self._save_gather_error("Couldn't find any metadata files", harvest_job)
-- 
GitLab