diff --git a/CHANGELOG.md b/CHANGELOG.md index 9895eb16b4f7a5b321865421c3aeda24a6eae470..03c4b99d6dc9408d7a54eb54a702de1d738fd79e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Fixed + +- Corrected handling of file hash and size updates to ensure they are properly calculated during resource updates and added enhanced error handling for missing files or processing failures during file attribute updates. + ## [2.4.7] - 2025-01-20 ### Added diff --git a/ckanext/odsh/plugin.py b/ckanext/odsh/plugin.py index 47f2a9a60a395b39fc207d0b7ce607f460784d3e..f0e70e3d678617c5e7d3d322e4a8317bc3a14bb9 100644 --- a/ckanext/odsh/plugin.py +++ b/ckanext/odsh/plugin.py @@ -279,13 +279,17 @@ class OdshPlugin(p.SingletonPlugin, DefaultTranslation, tk.DefaultDatasetForm): self._update_is_new_in_pkg_dict(pkg_dict) return pkg_dict - def after_dataset_create(self, context, resource): - if resource.get('package_id'): - tools.add_attributes_resources(context, resource) + def after_resource_create(self, context, resource): + log.debug('after_resource_create') - def after_dataset_update(self, context, resource): - if resource.get('package_id'): - tools.add_attributes_resources(context, resource) + if resource.get('url_type') == 'upload': + tools.add_resource_attributes(context, resource) + + def after_resource_update(self, context, resource): + log.debug('after_resource_update') + + if resource.get('url_type') == 'upload': + tools.add_resource_attributes(context, resource) @staticmethod def _update_is_new_in_pkg_dict(pkg_dict): diff --git a/ckanext/odsh/tools.py b/ckanext/odsh/tools.py index 63a45750be357ee61373e13d3f05f17ad1d811d3..b3cbd4413d6d58a8175209379193c190048db0f7 100644 --- a/ckanext/odsh/tools.py +++ b/ckanext/odsh/tools.py @@ -1,45 +1,50 @@ import os +import logging from ckanext.odsh.pdf_to_thumbnail.thumbnail import get_resource_path from ckanext.odsh.lib.uploader import calculate_hash import ckan.plugins.toolkit as toolkit -#import magic -#import pdftotext - -def add_attributes_resources(context, resource): - package_id = resource.get('package_id') - package = toolkit.get_action('package_show')(context, {'id': package_id}) - resources = package.get('resources') - i = 0 - for item in resources: - if item.get('id') == resource.get('id'): - path = get_resource_path(resource) - if os.path.exists(path): - with open(path, 'rb') as file: - - #size - if not item.get('size'): - resource_size = os.path.getsize(path) - item.update({'size': resource_size}) - - #hash - file.seek(0) - hash = calculate_hash(file) - item.update({'hash':hash}) - - #hash algorithm - item.update({'hash_algorithm': 'http://dcat-ap.de/def/hashAlgorithms/md/5'}) - - - #number of pages -# file_type = magic.from_file(path, mime = True) -# if file_type == 'application/pdf': -# file.seek(0) -# pdf = pdftotext.PDF(file) -# number_of_pages = len(pdf) -# item.update({'number_of_pages':number_of_pages}) - - resources[i] = item - break - i = i + 1 - package.update({'resources':resources}) - toolkit.get_action('package_update')(context, package) + +log = logging.getLogger(__name__) + +def add_resource_attributes(context, resource): + log.debug("add_resource_attributes") + + # Check if the resource is already processed for this cycle + if context.get('resource_processed', False): + log.debug("Resource already processed for this cycle, skipping.") + return + + # Return if the resource has no package_id + if not resource.get('package_id', False): + return + + # Mark the resource as processed for this cycle (in context, not on the resource) + context['resource_processed'] = True + + path = get_resource_path(resource) + + # Check if the path exists and is a file + if os.path.isfile(path): + try: + with open(path, 'rb') as file: + # Calculate and update file size if not already present + if not resource.get('size'): + resource_size = os.path.getsize(path) + resource.update({'size': resource_size}) + + # Calculate and update file hash + file.seek(0) # Ensure we're at the beginning of the file + hash = calculate_hash(file) + resource.update({'hash': hash}) + + # Specify hash algorithm + resource.update({'hash_algorithm': 'http://dcat-ap.de/def/hashAlgorithms/md/5'}) + + # Update the resource in the system + toolkit.get_action('resource_update')(context, resource) + + except Exception as e: + # Handle exceptions that might occur during file reading or hash calculation + toolkit.abort(500, f"Error processing resource at {path}: {str(e)}") + else: + toolkit.abort(404, f"File not found: {path}")