diff --git a/ckanext/odsh/harvesters/kielharvester.py b/ckanext/odsh/harvesters/kielharvester.py index 8ab1de94ceb684d3b23e7e3d41fc21063adf1c74..55a7317e8bd51ddbb5b411545ee111fe1f9d1694 100755 --- a/ckanext/odsh/harvesters/kielharvester.py +++ b/ckanext/odsh/harvesters/kielharvester.py @@ -35,6 +35,7 @@ class KielHarvester(ODSHBaseHarvester): def gather_stage(self, harvest_job): url = harvest_job.source.url datasets = requests.get(url=url).json() + count_known_dataset_ids = 0 try: used_identifiers = [] @@ -54,6 +55,8 @@ class KielHarvester(ODSHBaseHarvester): str(obj.id), str(obj.guid))) used_identifiers.append(guid) ids.append(obj.id) + else: + count_known_dataset_ids += 1 except Exception as e: self._save_gather_error( @@ -69,6 +72,10 @@ class KielHarvester(ODSHBaseHarvester): log.debug("List of gathered IDs: %s" % ids) log.debug("gather_stage() finished: %s IDs gathered" % len(ids)) return ids + elif count_known_dataset_ids > 0: + log.info("Gathered " + str(count_known_dataset_ids) + + " datasets already stored in the database. No new datasets found.") + return [] else: log.error("No records received") self._save_gather_error( diff --git a/ckanext/odsh/harvesters/statistikamtnordharvester.py b/ckanext/odsh/harvesters/statistikamtnordharvester.py index e722584922aa68aedae6c0527a90d832ec79eab3..50a082a66b9796a6d3c5c93f89644d851b93d486 100755 --- a/ckanext/odsh/harvesters/statistikamtnordharvester.py +++ b/ckanext/odsh/harvesters/statistikamtnordharvester.py @@ -37,6 +37,7 @@ class StatistikamtNordHarvester(ODSHBaseHarvester): def gather_stage(self, harvest_job): url = harvest_job.source.url + count_known_dataset_ids = 0 try: log.info('Stat_Nord_Harvester: Beginning gather stage') @@ -79,6 +80,8 @@ class StatistikamtNordHarvester(ODSHBaseHarvester): used_identifiers.append(identifier) ids.append(obj.id) log.debug('Save identifier %s from Statistik Nord' % identifier) + else: + count_known_dataset_ids += 1 except Exception, e: log.error('traceback: %s' % traceback.format_exc()) @@ -97,6 +100,10 @@ class StatistikamtNordHarvester(ODSHBaseHarvester): log.info("finished %s IDs of %s IDs successfully gathered" % (len(used_identifiers), len(documents))) log.debug("gather_stage() finished: %s IDs gathered" % len(ids)) return ids + elif count_known_dataset_ids > 0: + log.info("Gathered " + str(count_known_dataset_ids) + + " datasets already stored in the database. No new datasets found.") + return [] else: log.error("No records received") self._save_gather_error("Couldn't find any metadata files", harvest_job)