Skip to content
Snippets Groups Projects
Verified Commit 2e7c7f0d authored by Thorge Petersen's avatar Thorge Petersen
Browse files

fix: recalculate file attributes (size, hash) on resource update

parent 7a00c063
Branches
Tags
1 merge request!55Resolve "File Hash Not Recalculated During Resource Update"
...@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. ...@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Fixed
- Corrected handling of file hash and size updates to ensure they are properly calculated during resource updates and added enhanced error handling for missing files or processing failures during file attribute updates.
## [2.4.7] - 2025-01-20 ## [2.4.7] - 2025-01-20
### Added ### Added
......
...@@ -279,13 +279,17 @@ class OdshPlugin(p.SingletonPlugin, DefaultTranslation, tk.DefaultDatasetForm): ...@@ -279,13 +279,17 @@ class OdshPlugin(p.SingletonPlugin, DefaultTranslation, tk.DefaultDatasetForm):
self._update_is_new_in_pkg_dict(pkg_dict) self._update_is_new_in_pkg_dict(pkg_dict)
return pkg_dict return pkg_dict
def after_dataset_create(self, context, resource): def after_resource_create(self, context, resource):
if resource.get('package_id'): log.debug('after_resource_create')
tools.add_attributes_resources(context, resource)
def after_dataset_update(self, context, resource): if resource.get('url_type') == 'upload':
if resource.get('package_id'): tools.add_resource_attributes(context, resource)
tools.add_attributes_resources(context, resource)
def after_resource_update(self, context, resource):
log.debug('after_resource_update')
if resource.get('url_type') == 'upload':
tools.add_resource_attributes(context, resource)
@staticmethod @staticmethod
def _update_is_new_in_pkg_dict(pkg_dict): def _update_is_new_in_pkg_dict(pkg_dict):
......
import os import os
import logging
from ckanext.odsh.pdf_to_thumbnail.thumbnail import get_resource_path from ckanext.odsh.pdf_to_thumbnail.thumbnail import get_resource_path
from ckanext.odsh.lib.uploader import calculate_hash from ckanext.odsh.lib.uploader import calculate_hash
import ckan.plugins.toolkit as toolkit import ckan.plugins.toolkit as toolkit
#import magic
#import pdftotext log = logging.getLogger(__name__)
def add_attributes_resources(context, resource): def add_resource_attributes(context, resource):
package_id = resource.get('package_id') log.debug("add_resource_attributes")
package = toolkit.get_action('package_show')(context, {'id': package_id})
resources = package.get('resources') # Check if the resource is already processed for this cycle
i = 0 if context.get('resource_processed', False):
for item in resources: log.debug("Resource already processed for this cycle, skipping.")
if item.get('id') == resource.get('id'): return
path = get_resource_path(resource)
if os.path.exists(path): # Return if the resource has no package_id
with open(path, 'rb') as file: if not resource.get('package_id', False):
return
#size
if not item.get('size'): # Mark the resource as processed for this cycle (in context, not on the resource)
resource_size = os.path.getsize(path) context['resource_processed'] = True
item.update({'size': resource_size})
path = get_resource_path(resource)
#hash
file.seek(0) # Check if the path exists and is a file
hash = calculate_hash(file) if os.path.isfile(path):
item.update({'hash':hash}) try:
with open(path, 'rb') as file:
#hash algorithm # Calculate and update file size if not already present
item.update({'hash_algorithm': 'http://dcat-ap.de/def/hashAlgorithms/md/5'}) if not resource.get('size'):
resource_size = os.path.getsize(path)
resource.update({'size': resource_size})
#number of pages
# file_type = magic.from_file(path, mime = True) # Calculate and update file hash
# if file_type == 'application/pdf': file.seek(0) # Ensure we're at the beginning of the file
# file.seek(0) hash = calculate_hash(file)
# pdf = pdftotext.PDF(file) resource.update({'hash': hash})
# number_of_pages = len(pdf)
# item.update({'number_of_pages':number_of_pages}) # Specify hash algorithm
resource.update({'hash_algorithm': 'http://dcat-ap.de/def/hashAlgorithms/md/5'})
resources[i] = item
break # Update the resource in the system
i = i + 1 toolkit.get_action('resource_update')(context, resource)
package.update({'resources':resources})
toolkit.get_action('package_update')(context, package) except Exception as e:
# Handle exceptions that might occur during file reading or hash calculation
toolkit.abort(500, f"Error processing resource at {path}: {str(e)}")
else:
toolkit.abort(404, f"File not found: {path}")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment