Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • 10-eingabe-der-dct-accrualperiodicity-in-weboberflache
  • 47-aktuelle-resource-einer-collection-wird-nicht-mehr-gefunden
  • 71-migrate-custom-fields-to-ckanext-scheming
  • add-auth-subroute
  • add-author-maintainer-information
  • add-package-custom-fields
  • ckan-2.11.0
  • dev
  • fix-adding-datasets-for-users-and-editors
  • fix-inline-flex-btns
  • fix-known-spatial-uri-validation
  • master
  • py3
  • v1.3
  • 1.3.0
  • 1.4.0
  • 1.4.1
  • 1.4.2
  • 1.4.3
  • 2.0.0
  • 2.1.0
  • 2.2.0
  • 2.3.0
  • 2.3.1
  • 2.4.0
  • 2.4.1
  • 2.4.2
  • 2.4.3
  • 2.4.4
  • 2.4.5
  • 2.4.6
  • 2.4.7
  • 2.5.0
  • 2.5.1
34 results

Target

Select target project
  • opendata/ckanext-odsh
1 result
Select Git revision
  • 10-eingabe-der-dct-accrualperiodicity-in-weboberflache
  • 47-aktuelle-resource-einer-collection-wird-nicht-mehr-gefunden
  • 71-migrate-custom-fields-to-ckanext-scheming
  • add-auth-subroute
  • add-author-maintainer-information
  • add-package-custom-fields
  • ckan-2.11.0
  • dev
  • fix-adding-datasets-for-users-and-editors
  • fix-inline-flex-btns
  • fix-known-spatial-uri-validation
  • master
  • py3
  • v1.3
  • 1.3.0
  • 1.4.0
  • 1.4.1
  • 1.4.2
  • 1.4.3
  • 2.0.0
  • 2.1.0
  • 2.2.0
  • 2.3.0
  • 2.3.1
  • 2.4.0
  • 2.4.1
  • 2.4.2
  • 2.4.3
  • 2.4.4
  • 2.4.5
  • 2.4.6
  • 2.4.7
  • 2.5.0
  • 2.5.1
34 results
Show changes
Commits on Source (4)
......@@ -5,6 +5,16 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Fixed
- Corrected handling of file hash and size updates to ensure they are properly calculated during resource updates and added enhanced error handling for missing files or processing failures during file attribute updates.
### Removed
- Completely removed the legacy plugin responsible for creating thumbnails, including all associated code and functionality.
## [2.4.7] - 2025-01-20
### Added
......
# ckan
import ckan.plugins.toolkit as toolkit
import ckan.lib.helpers as helpers
from ckan.logic.action.update import package_update
from ckan.logic.action.delete import package_delete
from . import thumbnail
def before_package_delete(context, package_id_dict):
pkg_dict = toolkit.get_action('package_show')(context, package_id_dict)
if helpers.check_access('package_delete', pkg_dict):
thumbnail.remove_thumbnail(context)
return package_delete(context, package_id_dict)
def before_package_update(context, pkg_dict):
if helpers.check_access('package_update', pkg_dict):
package_id =pkg_dict.get('id')
package = toolkit.get_action('package_show')(context, {'id': package_id})
old_private = package.get('private')
new_private = pkg_dict.get('private')
old_filename = package.get('thumbnail')
if old_filename:
if str(old_private) != str(new_private):
new_filename = thumbnail.rename_thumbnail_to_random_name(old_filename)
pkg_dict['extras'].append({'key': 'thumbnail', 'value': new_filename})
elif not pkg_dict.get('thumbnail'):
pkg_dict['extras'].append({'key': 'thumbnail', 'value': old_filename})
return package_update(context, pkg_dict)
\ No newline at end of file
from ckan.lib.helpers import is_url, url_for
def thumbnail_namespace(filename):
return "/" + filename
def get_download_link_for_thumbnail(package):
resources = package.get('resources')
for resource in resources[::-1]:
url_type =resource.get('url_type')
mimetype = resource.get('mimetype')
if url_type == 'upload' and mimetype == 'application/pdf':
package_id = resource.get('package_id')
resource_id = resource.get('id')
pre_resource_url = resource.get('url')
if is_url(pre_resource_url):
url_resource = pre_resource_url
else:
url_resource = url_for(named_route='dataset.resource_download',
id=package_id,
resource_id=resource_id,
filename=pre_resource_url,
qualified = True)
return url_resource
import os
#from ckan
import ckan.plugins as plugins
#pdf_to_thumbnail
from . import thumbnail
from . import action as thumbnail_action
from . import helpers as thumbnail_helpers
import logging
log = logging.getLogger(__name__)
class ThumbnailPlugin(plugins.SingletonPlugin):
plugins.implements(plugins.IResourceController, inherit=True)
plugins.implements(plugins.IConfigurer, inherit=True)
plugins.implements(plugins.IActions, inherit=True)
plugins.implements(plugins.ITemplateHelpers)
#IResourceController
def after_resource_create(self, context, resource):
resources = thumbnail.resources_of_containing_package(resource)
thumbnail.create_thumbnail_if_none_in_package(context, resources)
def after_resource_update(self, context, resource):
resources = thumbnail.resources_of_containing_package(resource)
thumbnail.create_thumbnail_if_none_in_package(context, resources)
def after_resource_delete(self, context, resources):
thumbnail.create_thumbnail_if_none_in_package(context, resources)
#IConfigurer
def update_config(self, config_):
storage_path = config_.get('ckan.storage_path')
public_dir = os.path.join(storage_path, 'thumbnail')
if config_.get('extra_public_paths'):
config_['extra_public_paths'] += ',' + public_dir
else:
config_['extra_public_paths'] = public_dir
#IActions
def get_actions(self):
return {'package_delete': thumbnail_action.before_package_delete,
'package_update': thumbnail_action.before_package_update
}
#ITemplateHelpers
def get_helpers(self):
return {
'thumbnail_namespace':thumbnail_helpers.thumbnail_namespace,
'thumbail_get_download_link':thumbnail_helpers.get_download_link_for_thumbnail
}
import os
import magic
from pdf2image import convert_from_bytes
import logging
from ckan.common import config
import urllib.request, urllib.error, urllib.parse
from binascii import b2a_hex
import ckan.plugins.toolkit as toolkit
log = logging.getLogger(__name__)
def create_thumbnail(context, resource):
'''
main entry point into this module
this function is called from pdf_to_thumbnail.plugin
'''
old_filename = _get_filename_from_context(context)
url_type = resource.get('url_type')
if url_type == 'upload':
is_PDF, filename = _create_thumbnail_from_memory(resource, old_filename)
else:
is_PDF, filename = (False, None)
return is_PDF, filename
def _get_filename_from_context(context):
package = context.get('package')
package_id = package.id
package= toolkit.get_action('package_show')(None, {'id': package_id})
thumbnail = package.get('thumbnail')
return thumbnail
def _create_thumbnail_from_memory(resource, old_filename):
filepath = get_resource_path(resource)
is_PDF = _is_pdf(filepath)
if is_PDF:
with open(filepath, 'rb') as file:
new_filename = _create_thumbnail_from_file(file)
if old_filename:
ThumbnailPath.from_filename(old_filename).remove()
return is_PDF, new_filename
else:
return is_PDF, None
def get_resource_path(resource):
# see https://stackoverflow.com/questions/46572402/where-does-ckan-store-the-files-pushed-to-datastore-filestore
resource_id = resource.get('id')
filepath = os.path.join(
config.get('ckan.storage_path'),
'resources',
resource_id[0:3],
resource_id[3:6],
resource_id[6:]
)
return filepath
def _is_pdf(filepath):
file_type = magic.from_file(filepath, mime = True)
return file_type == 'application/pdf'
def _create_thumbnail_from_file(file):
width = config.get('ckan.thumbnail.size.width', 410)
new_thumbnail = ThumbnailPath.from_unique_random_name()
file.seek(0)
file_read = file.read()
convert_from_bytes(
file_read,
size=(width, None),
output_folder=new_thumbnail.folder,
output_file=new_thumbnail.filename,
single_file=True,
first_page=0,
last_page=0,
fmt='jpg'
)
return new_thumbnail.filename_with_extension
def thumbnail_folder():
return os.path.join(
config.get('ckan.storage_path'),
'thumbnail',
)
def rename_thumbnail_to_random_name(old_filename):
'''
used by pdf_to_thumbnail.action
'''
old_filepath = ThumbnailPath.from_filename_with_extension(old_filename)
new_filepath = ThumbnailPath.from_unique_random_name()
try:
os.renames(old_filepath.full_filename, new_filepath.full_filename)
return new_filepath.filename_with_extension
except OSError:
log.warning('The file path "{}" of package was not found.'.format(old_filepath))
def remove_thumbnail(context):
'''
used by pdf_to_thumbnail.action
'''
old_filename = _get_filename_from_context(context)
if old_filename:
ThumbnailPath.from_filename_with_extension(old_filename).remove()
def resources_of_containing_package(resource):
#todo: change arg order
'''
used by pdf_to_thumbnail.plugin
'''
package_id = resource.get('package_id')
package = toolkit.get_action('package_show')(None, {'id': package_id})
resources = package.get('resources')
return resources
def create_thumbnail_if_none_in_package(context, resources):
'''
used by pdf_to_thumbnail.plugin
loops through a package's resources in the order they have been uploaded
and for each tries to create a thumbnail until it succeeds.
If the package already has a thumbnail the creation step is skipped
'''
package_dict = _get_package_dict_from_context(context)
if not _has_thumbnail(package_dict):
any(_try_create_thumbnail(context, r) for r in resources)
def _get_package_dict_from_context(context):
package_id = context.get('package').id
package_dict = toolkit.get_action('package_show')(None, {'id': package_id})
return package_dict
def _has_thumbnail(package_dict):
thumbnail = package_dict.get('thumbnail')
return bool(thumbnail)
def _try_create_thumbnail(context, resource):
is_PDF, filename = create_thumbnail(context, resource)
success = is_PDF
if success:
_write_thumbnail_into_package(context, filename)
return success
def _write_thumbnail_into_package(context, filename):
package_dict = _get_package_dict_from_context(context)
if filename:
package_dict.update({'thumbnail': filename})
toolkit.get_action('package_update')(None, package_dict)
class ThumbnailPath(object):
'''
utility class to manage the path of thumbnail pictures
'''
def __init__(self, folder, filename, extension):
self.folder = folder
self.filename = filename
self.extension = extension
_EXTENSION = '.jpg'
@staticmethod
def from_filename(filename):
'''
filename without extension (i.e. '.jpg')
'''
return ThumbnailPath(thumbnail_folder(), filename, ThumbnailPath._EXTENSION)
@staticmethod
def from_filename_with_extension(filename_with_extension):
'''
limited to one dot in filename
'''
tokens = filename_with_extension.split('.')
if len(tokens) == 1:
filename = filename_with_extension
extension = ''
else:
filename = '.'.join(tokens[:-1])
extension = '.'.join(['', tokens[-1]])
return ThumbnailPath(thumbnail_folder(), filename, extension)
@staticmethod
def from_unique_random_name():
thumbnail_path = ThumbnailPath._from_random_name()
if thumbnail_path.exists():
return ThumbnailPath.from_unique_random_name()
return thumbnail_path
@staticmethod
def _from_random_name():
number = b2a_hex(os.urandom(15))
filename = 'thumbnail_picture_' + str(number)
return ThumbnailPath.from_filename(filename)
@property
def filename_with_extension(self):
return self.filename + self.extension
@property
def full_filename(self):
return os.path.join(self.folder, self.filename_with_extension)
def exists(self):
return os.path.exists(self.full_filename)
def remove(self):
if os.path.exists(self.full_filename):
os.remove(self.full_filename)
......@@ -168,10 +168,6 @@ class OdshPlugin(p.SingletonPlugin, DefaultTranslation, tk.DefaultDatasetForm):
tk.get_validator('ignore_missing'),
tk.get_converter('convert_to_extras')
],
'thumbnail': [
tk.get_validator('ignore_missing'),
tk.get_converter('convert_to_extras')
],
'relatedPackage': [
tk.get_validator('validate_relatedPackage'),
tk.get_converter('convert_to_extras')
......@@ -197,10 +193,6 @@ class OdshPlugin(p.SingletonPlugin, DefaultTranslation, tk.DefaultDatasetForm):
tk.get_converter('convert_from_extras'),
tk.get_validator('ignore_missing')
],
'thumbnail': [
tk.get_converter('convert_from_extras'),
tk.get_validator('ignore_missing')
],
'relatedPackage': [
tk.get_converter('convert_from_extras'),
tk.get_validator('ignore_missing')
......@@ -279,13 +271,17 @@ class OdshPlugin(p.SingletonPlugin, DefaultTranslation, tk.DefaultDatasetForm):
self._update_is_new_in_pkg_dict(pkg_dict)
return pkg_dict
def after_dataset_create(self, context, resource):
if resource.get('package_id'):
tools.add_attributes_resources(context, resource)
def after_resource_create(self, context, resource):
log.debug('after_resource_create')
if resource.get('url_type') == 'upload':
tools.add_resource_attributes(context, resource)
def after_resource_update(self, context, resource):
log.debug('after_resource_update')
def after_dataset_update(self, context, resource):
if resource.get('package_id'):
tools.add_attributes_resources(context, resource)
if resource.get('url_type') == 'upload':
tools.add_resource_attributes(context, resource)
@staticmethod
def _update_is_new_in_pkg_dict(pkg_dict):
......
......@@ -29,7 +29,6 @@ Example:
{% set daterange = h.get_daterange_prettified(package) %}
{% set language_of_package = h.get_language_of_package(package) %}
{% set language_icon = h.get_language_icon(package) %}
{% set thumbnail = package.get('thumbnail') %}
{% block package_item %}
<div class="odsh-dataset-item">
......
import os
from ckanext.odsh.pdf_to_thumbnail.thumbnail import get_resource_path
import logging
from ckanext.odsh.lib.uploader import calculate_hash
from ckan.common import config
import ckan.plugins.toolkit as toolkit
#import magic
#import pdftotext
def add_attributes_resources(context, resource):
package_id = resource.get('package_id')
package = toolkit.get_action('package_show')(context, {'id': package_id})
resources = package.get('resources')
i = 0
for item in resources:
if item.get('id') == resource.get('id'):
path = get_resource_path(resource)
if os.path.exists(path):
with open(path, 'rb') as file:
#size
if not item.get('size'):
resource_size = os.path.getsize(path)
item.update({'size': resource_size})
#hash
file.seek(0)
hash = calculate_hash(file)
item.update({'hash':hash})
#hash algorithm
item.update({'hash_algorithm': 'http://dcat-ap.de/def/hashAlgorithms/md/5'})
#number of pages
# file_type = magic.from_file(path, mime = True)
# if file_type == 'application/pdf':
# file.seek(0)
# pdf = pdftotext.PDF(file)
# number_of_pages = len(pdf)
# item.update({'number_of_pages':number_of_pages})
resources[i] = item
break
i = i + 1
package.update({'resources':resources})
toolkit.get_action('package_update')(context, package)
log = logging.getLogger(__name__)
def add_resource_attributes(context, resource):
log.debug("add_resource_attributes")
# Check if the resource is already processed for this cycle
if context.get('resource_processed', False):
log.debug("Resource already processed for this cycle, skipping.")
return
# Return if the resource has no package_id
if not resource.get('package_id', False):
return
# Mark the resource as processed for this cycle (in context, not on the resource)
context['resource_processed'] = True
path = _get_resource_path(resource)
# Check if the path exists and is a file
if os.path.isfile(path):
try:
with open(path, 'rb') as file:
# Calculate and update file size if not already present
if not resource.get('size'):
resource_size = os.path.getsize(path)
resource.update({'size': resource_size})
# Calculate and update file hash
file.seek(0) # Ensure we're at the beginning of the file
hash = calculate_hash(file)
resource.update({'hash': hash})
# Specify hash algorithm
resource.update({'hash_algorithm': 'http://dcat-ap.de/def/hashAlgorithms/md/5'})
# Update the resource in the system
toolkit.get_action('resource_update')(context, resource)
except Exception as e:
# Handle exceptions that might occur during file reading or hash calculation
toolkit.abort(500, f"Error processing resource at {path}: {str(e)}")
else:
toolkit.abort(404, f"File not found: {path}")
def _get_resource_path(resource):
# see https://stackoverflow.com/questions/46572402/where-does-ckan-store-the-files-pushed-to-datastore-filestore
resource_id = resource.get('id')
filepath = os.path.join(
config.get('ckan.storage_path'),
'resources',
resource_id[0:3],
resource_id[3:6],
resource_id[6:]
)
return filepath
\ No newline at end of file
......@@ -86,7 +86,6 @@ setup(
odsh_autocomplete=ckanext.odsh.plugin_odsh_autocomplete:OdshAutocompletePlugin
odsh_dcat_harvest=ckanext.odsh.plugin_odsh_dcat_harvest:OdshDCATHarvestPlugin
odsh_collections=ckanext.odsh.collection.plugin:CollectionsPlugin
thumbnail=ckanext.odsh.pdf_to_thumbnail.plugin:ThumbnailPlugin
[paste.paster_command]
odsh_initialization = ckanext.odsh.commands.initialization:Initialization
......