Newer
Older
import logging
from ckan.common import config
log = logging.getLogger(__name__)
def add_resource_attributes(context, resource):
log.debug("add_resource_attributes")
# Check if the resource is already processed for this cycle
if context.get('resource_processed', False):
log.debug("Resource already processed for this cycle, skipping.")
return
# Return if the resource has no package_id
if not resource.get('package_id', False):
return
# Mark the resource as processed for this cycle (in context, not on the resource)
context['resource_processed'] = True
path = _get_resource_path(resource)
# Check if the path exists and is a file
if os.path.isfile(path):
try:
with open(path, 'rb') as file:
# Calculate and update file size if not already present
if not resource.get('size'):
resource_size = os.path.getsize(path)
resource.update({'size': resource_size})
# Calculate and update file hash
file.seek(0) # Ensure we're at the beginning of the file
hash = calculate_hash(file)
resource.update({'hash': hash})
# Specify hash algorithm
resource.update({'hash_algorithm': 'http://dcat-ap.de/def/hashAlgorithms/md/5'})
# Update the resource in the system
toolkit.get_action('resource_update')(context, resource)
except Exception as e:
# Handle exceptions that might occur during file reading or hash calculation
toolkit.abort(500, f"Error processing resource at {path}: {str(e)}")
else:
toolkit.abort(404, f"File not found: {path}")
def _get_resource_path(resource):
# see https://stackoverflow.com/questions/46572402/where-does-ckan-store-the-files-pushed-to-datastore-filestore
resource_id = resource.get('id')
filepath = os.path.join(
config.get('ckan.storage_path'),
'resources',
resource_id[0:3],
resource_id[3:6],
resource_id[6:]
)
return filepath