Skip to content
Snippets Groups Projects
Verified Commit 2e7c7f0d authored by Thorge Petersen's avatar Thorge Petersen
Browse files

fix: recalculate file attributes (size, hash) on resource update

parent 7a00c063
Branches fix-adding-datasets-for-users-and-editors
Tags
1 merge request!55Resolve "File Hash Not Recalculated During Resource Update"
......@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Fixed
- Corrected handling of file hash and size updates to ensure they are properly calculated during resource updates and added enhanced error handling for missing files or processing failures during file attribute updates.
## [2.4.7] - 2025-01-20
### Added
......
......@@ -279,13 +279,17 @@ class OdshPlugin(p.SingletonPlugin, DefaultTranslation, tk.DefaultDatasetForm):
self._update_is_new_in_pkg_dict(pkg_dict)
return pkg_dict
def after_dataset_create(self, context, resource):
if resource.get('package_id'):
tools.add_attributes_resources(context, resource)
def after_resource_create(self, context, resource):
log.debug('after_resource_create')
def after_dataset_update(self, context, resource):
if resource.get('package_id'):
tools.add_attributes_resources(context, resource)
if resource.get('url_type') == 'upload':
tools.add_resource_attributes(context, resource)
def after_resource_update(self, context, resource):
log.debug('after_resource_update')
if resource.get('url_type') == 'upload':
tools.add_resource_attributes(context, resource)
@staticmethod
def _update_is_new_in_pkg_dict(pkg_dict):
......
import os
import logging
from ckanext.odsh.pdf_to_thumbnail.thumbnail import get_resource_path
from ckanext.odsh.lib.uploader import calculate_hash
import ckan.plugins.toolkit as toolkit
#import magic
#import pdftotext
def add_attributes_resources(context, resource):
package_id = resource.get('package_id')
package = toolkit.get_action('package_show')(context, {'id': package_id})
resources = package.get('resources')
i = 0
for item in resources:
if item.get('id') == resource.get('id'):
path = get_resource_path(resource)
if os.path.exists(path):
with open(path, 'rb') as file:
#size
if not item.get('size'):
resource_size = os.path.getsize(path)
item.update({'size': resource_size})
#hash
file.seek(0)
hash = calculate_hash(file)
item.update({'hash':hash})
#hash algorithm
item.update({'hash_algorithm': 'http://dcat-ap.de/def/hashAlgorithms/md/5'})
#number of pages
# file_type = magic.from_file(path, mime = True)
# if file_type == 'application/pdf':
# file.seek(0)
# pdf = pdftotext.PDF(file)
# number_of_pages = len(pdf)
# item.update({'number_of_pages':number_of_pages})
resources[i] = item
break
i = i + 1
package.update({'resources':resources})
toolkit.get_action('package_update')(context, package)
log = logging.getLogger(__name__)
def add_resource_attributes(context, resource):
log.debug("add_resource_attributes")
# Check if the resource is already processed for this cycle
if context.get('resource_processed', False):
log.debug("Resource already processed for this cycle, skipping.")
return
# Return if the resource has no package_id
if not resource.get('package_id', False):
return
# Mark the resource as processed for this cycle (in context, not on the resource)
context['resource_processed'] = True
path = get_resource_path(resource)
# Check if the path exists and is a file
if os.path.isfile(path):
try:
with open(path, 'rb') as file:
# Calculate and update file size if not already present
if not resource.get('size'):
resource_size = os.path.getsize(path)
resource.update({'size': resource_size})
# Calculate and update file hash
file.seek(0) # Ensure we're at the beginning of the file
hash = calculate_hash(file)
resource.update({'hash': hash})
# Specify hash algorithm
resource.update({'hash_algorithm': 'http://dcat-ap.de/def/hashAlgorithms/md/5'})
# Update the resource in the system
toolkit.get_action('resource_update')(context, resource)
except Exception as e:
# Handle exceptions that might occur during file reading or hash calculation
toolkit.abort(500, f"Error processing resource at {path}: {str(e)}")
else:
toolkit.abort(404, f"File not found: {path}")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment