Skip to content
Snippets Groups Projects
Verified Commit fd3ba4ef authored by Thorge Petersen's avatar Thorge Petersen
Browse files

remove: eliminate legacy thumbnail creation plugin and associated code

parent 2e7c7f0d
Branches
Tags
1 merge request!55Resolve "File Hash Not Recalculated During Resource Update"
...@@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ...@@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Corrected handling of file hash and size updates to ensure they are properly calculated during resource updates and added enhanced error handling for missing files or processing failures during file attribute updates. - Corrected handling of file hash and size updates to ensure they are properly calculated during resource updates and added enhanced error handling for missing files or processing failures during file attribute updates.
### Removed
- Completely removed the legacy plugin responsible for creating thumbnails, including all associated code and functionality.
## [2.4.7] - 2025-01-20 ## [2.4.7] - 2025-01-20
### Added ### Added
......
# ckan
import ckan.plugins.toolkit as toolkit
import ckan.lib.helpers as helpers
from ckan.logic.action.update import package_update
from ckan.logic.action.delete import package_delete
from . import thumbnail
def before_package_delete(context, package_id_dict):
pkg_dict = toolkit.get_action('package_show')(context, package_id_dict)
if helpers.check_access('package_delete', pkg_dict):
thumbnail.remove_thumbnail(context)
return package_delete(context, package_id_dict)
def before_package_update(context, pkg_dict):
if helpers.check_access('package_update', pkg_dict):
package_id =pkg_dict.get('id')
package = toolkit.get_action('package_show')(context, {'id': package_id})
old_private = package.get('private')
new_private = pkg_dict.get('private')
old_filename = package.get('thumbnail')
if old_filename:
if str(old_private) != str(new_private):
new_filename = thumbnail.rename_thumbnail_to_random_name(old_filename)
pkg_dict['extras'].append({'key': 'thumbnail', 'value': new_filename})
elif not pkg_dict.get('thumbnail'):
pkg_dict['extras'].append({'key': 'thumbnail', 'value': old_filename})
return package_update(context, pkg_dict)
\ No newline at end of file
from ckan.lib.helpers import is_url, url_for
def thumbnail_namespace(filename):
return "/" + filename
def get_download_link_for_thumbnail(package):
resources = package.get('resources')
for resource in resources[::-1]:
url_type =resource.get('url_type')
mimetype = resource.get('mimetype')
if url_type == 'upload' and mimetype == 'application/pdf':
package_id = resource.get('package_id')
resource_id = resource.get('id')
pre_resource_url = resource.get('url')
if is_url(pre_resource_url):
url_resource = pre_resource_url
else:
url_resource = url_for(named_route='dataset.resource_download',
id=package_id,
resource_id=resource_id,
filename=pre_resource_url,
qualified = True)
return url_resource
import os
#from ckan
import ckan.plugins as plugins
#pdf_to_thumbnail
from . import thumbnail
from . import action as thumbnail_action
from . import helpers as thumbnail_helpers
import logging
log = logging.getLogger(__name__)
class ThumbnailPlugin(plugins.SingletonPlugin):
plugins.implements(plugins.IResourceController, inherit=True)
plugins.implements(plugins.IConfigurer, inherit=True)
plugins.implements(plugins.IActions, inherit=True)
plugins.implements(plugins.ITemplateHelpers)
#IResourceController
def after_resource_create(self, context, resource):
resources = thumbnail.resources_of_containing_package(resource)
thumbnail.create_thumbnail_if_none_in_package(context, resources)
def after_resource_update(self, context, resource):
resources = thumbnail.resources_of_containing_package(resource)
thumbnail.create_thumbnail_if_none_in_package(context, resources)
def after_resource_delete(self, context, resources):
thumbnail.create_thumbnail_if_none_in_package(context, resources)
#IConfigurer
def update_config(self, config_):
storage_path = config_.get('ckan.storage_path')
public_dir = os.path.join(storage_path, 'thumbnail')
if config_.get('extra_public_paths'):
config_['extra_public_paths'] += ',' + public_dir
else:
config_['extra_public_paths'] = public_dir
#IActions
def get_actions(self):
return {'package_delete': thumbnail_action.before_package_delete,
'package_update': thumbnail_action.before_package_update
}
#ITemplateHelpers
def get_helpers(self):
return {
'thumbnail_namespace':thumbnail_helpers.thumbnail_namespace,
'thumbail_get_download_link':thumbnail_helpers.get_download_link_for_thumbnail
}
import os
import magic
from pdf2image import convert_from_bytes
import logging
from ckan.common import config
import urllib.request, urllib.error, urllib.parse
from binascii import b2a_hex
import ckan.plugins.toolkit as toolkit
log = logging.getLogger(__name__)
def create_thumbnail(context, resource):
'''
main entry point into this module
this function is called from pdf_to_thumbnail.plugin
'''
old_filename = _get_filename_from_context(context)
url_type = resource.get('url_type')
if url_type == 'upload':
is_PDF, filename = _create_thumbnail_from_memory(resource, old_filename)
else:
is_PDF, filename = (False, None)
return is_PDF, filename
def _get_filename_from_context(context):
package = context.get('package')
package_id = package.id
package= toolkit.get_action('package_show')(None, {'id': package_id})
thumbnail = package.get('thumbnail')
return thumbnail
def _create_thumbnail_from_memory(resource, old_filename):
filepath = get_resource_path(resource)
is_PDF = _is_pdf(filepath)
if is_PDF:
with open(filepath, 'rb') as file:
new_filename = _create_thumbnail_from_file(file)
if old_filename:
ThumbnailPath.from_filename(old_filename).remove()
return is_PDF, new_filename
else:
return is_PDF, None
def get_resource_path(resource):
# see https://stackoverflow.com/questions/46572402/where-does-ckan-store-the-files-pushed-to-datastore-filestore
resource_id = resource.get('id')
filepath = os.path.join(
config.get('ckan.storage_path'),
'resources',
resource_id[0:3],
resource_id[3:6],
resource_id[6:]
)
return filepath
def _is_pdf(filepath):
file_type = magic.from_file(filepath, mime = True)
return file_type == 'application/pdf'
def _create_thumbnail_from_file(file):
width = config.get('ckan.thumbnail.size.width', 410)
new_thumbnail = ThumbnailPath.from_unique_random_name()
file.seek(0)
file_read = file.read()
convert_from_bytes(
file_read,
size=(width, None),
output_folder=new_thumbnail.folder,
output_file=new_thumbnail.filename,
single_file=True,
first_page=0,
last_page=0,
fmt='jpg'
)
return new_thumbnail.filename_with_extension
def thumbnail_folder():
return os.path.join(
config.get('ckan.storage_path'),
'thumbnail',
)
def rename_thumbnail_to_random_name(old_filename):
'''
used by pdf_to_thumbnail.action
'''
old_filepath = ThumbnailPath.from_filename_with_extension(old_filename)
new_filepath = ThumbnailPath.from_unique_random_name()
try:
os.renames(old_filepath.full_filename, new_filepath.full_filename)
return new_filepath.filename_with_extension
except OSError:
log.warning('The file path "{}" of package was not found.'.format(old_filepath))
def remove_thumbnail(context):
'''
used by pdf_to_thumbnail.action
'''
old_filename = _get_filename_from_context(context)
if old_filename:
ThumbnailPath.from_filename_with_extension(old_filename).remove()
def resources_of_containing_package(resource):
#todo: change arg order
'''
used by pdf_to_thumbnail.plugin
'''
package_id = resource.get('package_id')
package = toolkit.get_action('package_show')(None, {'id': package_id})
resources = package.get('resources')
return resources
def create_thumbnail_if_none_in_package(context, resources):
'''
used by pdf_to_thumbnail.plugin
loops through a package's resources in the order they have been uploaded
and for each tries to create a thumbnail until it succeeds.
If the package already has a thumbnail the creation step is skipped
'''
package_dict = _get_package_dict_from_context(context)
if not _has_thumbnail(package_dict):
any(_try_create_thumbnail(context, r) for r in resources)
def _get_package_dict_from_context(context):
package_id = context.get('package').id
package_dict = toolkit.get_action('package_show')(None, {'id': package_id})
return package_dict
def _has_thumbnail(package_dict):
thumbnail = package_dict.get('thumbnail')
return bool(thumbnail)
def _try_create_thumbnail(context, resource):
is_PDF, filename = create_thumbnail(context, resource)
success = is_PDF
if success:
_write_thumbnail_into_package(context, filename)
return success
def _write_thumbnail_into_package(context, filename):
package_dict = _get_package_dict_from_context(context)
if filename:
package_dict.update({'thumbnail': filename})
toolkit.get_action('package_update')(None, package_dict)
class ThumbnailPath(object):
'''
utility class to manage the path of thumbnail pictures
'''
def __init__(self, folder, filename, extension):
self.folder = folder
self.filename = filename
self.extension = extension
_EXTENSION = '.jpg'
@staticmethod
def from_filename(filename):
'''
filename without extension (i.e. '.jpg')
'''
return ThumbnailPath(thumbnail_folder(), filename, ThumbnailPath._EXTENSION)
@staticmethod
def from_filename_with_extension(filename_with_extension):
'''
limited to one dot in filename
'''
tokens = filename_with_extension.split('.')
if len(tokens) == 1:
filename = filename_with_extension
extension = ''
else:
filename = '.'.join(tokens[:-1])
extension = '.'.join(['', tokens[-1]])
return ThumbnailPath(thumbnail_folder(), filename, extension)
@staticmethod
def from_unique_random_name():
thumbnail_path = ThumbnailPath._from_random_name()
if thumbnail_path.exists():
return ThumbnailPath.from_unique_random_name()
return thumbnail_path
@staticmethod
def _from_random_name():
number = b2a_hex(os.urandom(15))
filename = 'thumbnail_picture_' + str(number)
return ThumbnailPath.from_filename(filename)
@property
def filename_with_extension(self):
return self.filename + self.extension
@property
def full_filename(self):
return os.path.join(self.folder, self.filename_with_extension)
def exists(self):
return os.path.exists(self.full_filename)
def remove(self):
if os.path.exists(self.full_filename):
os.remove(self.full_filename)
...@@ -168,10 +168,6 @@ class OdshPlugin(p.SingletonPlugin, DefaultTranslation, tk.DefaultDatasetForm): ...@@ -168,10 +168,6 @@ class OdshPlugin(p.SingletonPlugin, DefaultTranslation, tk.DefaultDatasetForm):
tk.get_validator('ignore_missing'), tk.get_validator('ignore_missing'),
tk.get_converter('convert_to_extras') tk.get_converter('convert_to_extras')
], ],
'thumbnail': [
tk.get_validator('ignore_missing'),
tk.get_converter('convert_to_extras')
],
'relatedPackage': [ 'relatedPackage': [
tk.get_validator('validate_relatedPackage'), tk.get_validator('validate_relatedPackage'),
tk.get_converter('convert_to_extras') tk.get_converter('convert_to_extras')
...@@ -197,10 +193,6 @@ class OdshPlugin(p.SingletonPlugin, DefaultTranslation, tk.DefaultDatasetForm): ...@@ -197,10 +193,6 @@ class OdshPlugin(p.SingletonPlugin, DefaultTranslation, tk.DefaultDatasetForm):
tk.get_converter('convert_from_extras'), tk.get_converter('convert_from_extras'),
tk.get_validator('ignore_missing') tk.get_validator('ignore_missing')
], ],
'thumbnail': [
tk.get_converter('convert_from_extras'),
tk.get_validator('ignore_missing')
],
'relatedPackage': [ 'relatedPackage': [
tk.get_converter('convert_from_extras'), tk.get_converter('convert_from_extras'),
tk.get_validator('ignore_missing') tk.get_validator('ignore_missing')
......
...@@ -29,7 +29,6 @@ Example: ...@@ -29,7 +29,6 @@ Example:
{% set daterange = h.get_daterange_prettified(package) %} {% set daterange = h.get_daterange_prettified(package) %}
{% set language_of_package = h.get_language_of_package(package) %} {% set language_of_package = h.get_language_of_package(package) %}
{% set language_icon = h.get_language_icon(package) %} {% set language_icon = h.get_language_icon(package) %}
{% set thumbnail = package.get('thumbnail') %}
{% block package_item %} {% block package_item %}
<div class="odsh-dataset-item"> <div class="odsh-dataset-item">
......
...@@ -2,6 +2,7 @@ import os ...@@ -2,6 +2,7 @@ import os
import logging import logging
from ckanext.odsh.pdf_to_thumbnail.thumbnail import get_resource_path from ckanext.odsh.pdf_to_thumbnail.thumbnail import get_resource_path
from ckanext.odsh.lib.uploader import calculate_hash from ckanext.odsh.lib.uploader import calculate_hash
from ckan.common import config
import ckan.plugins.toolkit as toolkit import ckan.plugins.toolkit as toolkit
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
...@@ -21,7 +22,7 @@ def add_resource_attributes(context, resource): ...@@ -21,7 +22,7 @@ def add_resource_attributes(context, resource):
# Mark the resource as processed for this cycle (in context, not on the resource) # Mark the resource as processed for this cycle (in context, not on the resource)
context['resource_processed'] = True context['resource_processed'] = True
path = get_resource_path(resource) path = _get_resource_path(resource)
# Check if the path exists and is a file # Check if the path exists and is a file
if os.path.isfile(path): if os.path.isfile(path):
...@@ -48,3 +49,15 @@ def add_resource_attributes(context, resource): ...@@ -48,3 +49,15 @@ def add_resource_attributes(context, resource):
toolkit.abort(500, f"Error processing resource at {path}: {str(e)}") toolkit.abort(500, f"Error processing resource at {path}: {str(e)}")
else: else:
toolkit.abort(404, f"File not found: {path}") toolkit.abort(404, f"File not found: {path}")
def _get_resource_path(resource):
# see https://stackoverflow.com/questions/46572402/where-does-ckan-store-the-files-pushed-to-datastore-filestore
resource_id = resource.get('id')
filepath = os.path.join(
config.get('ckan.storage_path'),
'resources',
resource_id[0:3],
resource_id[3:6],
resource_id[6:]
)
return filepath
\ No newline at end of file
...@@ -86,7 +86,6 @@ setup( ...@@ -86,7 +86,6 @@ setup(
odsh_autocomplete=ckanext.odsh.plugin_odsh_autocomplete:OdshAutocompletePlugin odsh_autocomplete=ckanext.odsh.plugin_odsh_autocomplete:OdshAutocompletePlugin
odsh_dcat_harvest=ckanext.odsh.plugin_odsh_dcat_harvest:OdshDCATHarvestPlugin odsh_dcat_harvest=ckanext.odsh.plugin_odsh_dcat_harvest:OdshDCATHarvestPlugin
odsh_collections=ckanext.odsh.collection.plugin:CollectionsPlugin odsh_collections=ckanext.odsh.collection.plugin:CollectionsPlugin
thumbnail=ckanext.odsh.pdf_to_thumbnail.plugin:ThumbnailPlugin
[paste.paster_command] [paste.paster_command]
odsh_initialization = ckanext.odsh.commands.initialization:Initialization odsh_initialization = ckanext.odsh.commands.initialization:Initialization
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment