Skip to content
Snippets Groups Projects
Verified Commit 2e7c7f0d authored by Thorge Petersen's avatar Thorge Petersen
Browse files

fix: recalculate file attributes (size, hash) on resource update

parent 7a00c063
Branches
Tags
1 merge request!55Resolve "File Hash Not Recalculated During Resource Update"
......@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Fixed
- Corrected handling of file hash and size updates to ensure they are properly calculated during resource updates and added enhanced error handling for missing files or processing failures during file attribute updates.
## [2.4.7] - 2025-01-20
### Added
......
......@@ -279,13 +279,17 @@ class OdshPlugin(p.SingletonPlugin, DefaultTranslation, tk.DefaultDatasetForm):
self._update_is_new_in_pkg_dict(pkg_dict)
return pkg_dict
def after_dataset_create(self, context, resource):
if resource.get('package_id'):
tools.add_attributes_resources(context, resource)
def after_resource_create(self, context, resource):
log.debug('after_resource_create')
def after_dataset_update(self, context, resource):
if resource.get('package_id'):
tools.add_attributes_resources(context, resource)
if resource.get('url_type') == 'upload':
tools.add_resource_attributes(context, resource)
def after_resource_update(self, context, resource):
log.debug('after_resource_update')
if resource.get('url_type') == 'upload':
tools.add_resource_attributes(context, resource)
@staticmethod
def _update_is_new_in_pkg_dict(pkg_dict):
......
import os
import logging
from ckanext.odsh.pdf_to_thumbnail.thumbnail import get_resource_path
from ckanext.odsh.lib.uploader import calculate_hash
import ckan.plugins.toolkit as toolkit
#import magic
#import pdftotext
def add_attributes_resources(context, resource):
package_id = resource.get('package_id')
package = toolkit.get_action('package_show')(context, {'id': package_id})
resources = package.get('resources')
i = 0
for item in resources:
if item.get('id') == resource.get('id'):
log = logging.getLogger(__name__)
def add_resource_attributes(context, resource):
log.debug("add_resource_attributes")
# Check if the resource is already processed for this cycle
if context.get('resource_processed', False):
log.debug("Resource already processed for this cycle, skipping.")
return
# Return if the resource has no package_id
if not resource.get('package_id', False):
return
# Mark the resource as processed for this cycle (in context, not on the resource)
context['resource_processed'] = True
path = get_resource_path(resource)
if os.path.exists(path):
with open(path, 'rb') as file:
#size
if not item.get('size'):
# Check if the path exists and is a file
if os.path.isfile(path):
try:
with open(path, 'rb') as file:
# Calculate and update file size if not already present
if not resource.get('size'):
resource_size = os.path.getsize(path)
item.update({'size': resource_size})
resource.update({'size': resource_size})
#hash
file.seek(0)
# Calculate and update file hash
file.seek(0) # Ensure we're at the beginning of the file
hash = calculate_hash(file)
item.update({'hash':hash})
#hash algorithm
item.update({'hash_algorithm': 'http://dcat-ap.de/def/hashAlgorithms/md/5'})
resource.update({'hash': hash})
# Specify hash algorithm
resource.update({'hash_algorithm': 'http://dcat-ap.de/def/hashAlgorithms/md/5'})
#number of pages
# file_type = magic.from_file(path, mime = True)
# if file_type == 'application/pdf':
# file.seek(0)
# pdf = pdftotext.PDF(file)
# number_of_pages = len(pdf)
# item.update({'number_of_pages':number_of_pages})
# Update the resource in the system
toolkit.get_action('resource_update')(context, resource)
resources[i] = item
break
i = i + 1
package.update({'resources':resources})
toolkit.get_action('package_update')(context, package)
except Exception as e:
# Handle exceptions that might occur during file reading or hash calculation
toolkit.abort(500, f"Error processing resource at {path}: {str(e)}")
else:
toolkit.abort(404, f"File not found: {path}")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment