From fd3ba4efd9710a8f52838feed9c184eb1a6a94d4 Mon Sep 17 00:00:00 2001
From: Thorge Petersen <petersen@rz.uni-kiel.de>
Date: Mon, 27 Jan 2025 12:54:30 +0100
Subject: [PATCH] remove: eliminate legacy thumbnail creation plugin and
 associated code

---
 CHANGELOG.md                                  |   4 +
 ckanext/odsh/pdf_to_thumbnail/__init__.py     |   0
 ckanext/odsh/pdf_to_thumbnail/action.py       |  29 ---
 ckanext/odsh/pdf_to_thumbnail/helpers.py      |  26 --
 ckanext/odsh/pdf_to_thumbnail/plugin.py       |  58 -----
 ckanext/odsh/pdf_to_thumbnail/thumbnail.py    | 223 ------------------
 ckanext/odsh/plugin.py                        |   8 -
 .../odsh/templates/snippets/package_item.html |   1 -
 ckanext/odsh/tools.py                         |  15 +-
 setup.py                                      |   1 -
 10 files changed, 18 insertions(+), 347 deletions(-)
 delete mode 100644 ckanext/odsh/pdf_to_thumbnail/__init__.py
 delete mode 100644 ckanext/odsh/pdf_to_thumbnail/action.py
 delete mode 100644 ckanext/odsh/pdf_to_thumbnail/helpers.py
 delete mode 100644 ckanext/odsh/pdf_to_thumbnail/plugin.py
 delete mode 100644 ckanext/odsh/pdf_to_thumbnail/thumbnail.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 03c4b99d..eeeab63a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Corrected handling of file hash and size updates to ensure they are properly calculated during resource updates and added enhanced error handling for missing files or processing failures during file attribute updates.
 
+### Removed
+
+- Completely removed the legacy plugin responsible for creating thumbnails, including all associated code and functionality.
+
 ## [2.4.7] - 2025-01-20
 
 ### Added
diff --git a/ckanext/odsh/pdf_to_thumbnail/__init__.py b/ckanext/odsh/pdf_to_thumbnail/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/ckanext/odsh/pdf_to_thumbnail/action.py b/ckanext/odsh/pdf_to_thumbnail/action.py
deleted file mode 100644
index d4a19d19..00000000
--- a/ckanext/odsh/pdf_to_thumbnail/action.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# ckan
-import ckan.plugins.toolkit as toolkit
-import ckan.lib.helpers as helpers
-from ckan.logic.action.update import package_update
-from ckan.logic.action.delete import package_delete
-
-from . import thumbnail
-
-
-def before_package_delete(context, package_id_dict):
-    pkg_dict = toolkit.get_action('package_show')(context, package_id_dict)
-    if helpers.check_access('package_delete', pkg_dict):
-        thumbnail.remove_thumbnail(context)
-    return package_delete(context, package_id_dict)
-    
-def before_package_update(context, pkg_dict):
-    if helpers.check_access('package_update', pkg_dict):
-        package_id =pkg_dict.get('id') 
-        package = toolkit.get_action('package_show')(context, {'id': package_id})
-        old_private = package.get('private')
-        new_private = pkg_dict.get('private')
-        old_filename = package.get('thumbnail')
-        if old_filename:
-            if str(old_private) != str(new_private):
-                new_filename = thumbnail.rename_thumbnail_to_random_name(old_filename)
-                pkg_dict['extras'].append({'key': 'thumbnail', 'value': new_filename})
-            elif not pkg_dict.get('thumbnail'): 
-                pkg_dict['extras'].append({'key': 'thumbnail', 'value': old_filename})
-    return package_update(context, pkg_dict)
\ No newline at end of file
diff --git a/ckanext/odsh/pdf_to_thumbnail/helpers.py b/ckanext/odsh/pdf_to_thumbnail/helpers.py
deleted file mode 100644
index 82a5d15c..00000000
--- a/ckanext/odsh/pdf_to_thumbnail/helpers.py
+++ /dev/null
@@ -1,26 +0,0 @@
-
-from ckan.lib.helpers import is_url, url_for
-
-def thumbnail_namespace(filename):
-    return "/" + filename
-
-def get_download_link_for_thumbnail(package):
-    resources = package.get('resources')
-    for resource in resources[::-1]:
-        url_type =resource.get('url_type')
-        mimetype = resource.get('mimetype')
-        if url_type == 'upload' and mimetype == 'application/pdf':
-            package_id = resource.get('package_id')
-            resource_id = resource.get('id')
-            pre_resource_url = resource.get('url')
-            if is_url(pre_resource_url):
-                url_resource = pre_resource_url
-            else:
-                url_resource = url_for(named_route='dataset.resource_download',
-                                    id=package_id,
-                                    resource_id=resource_id,
-                                    filename=pre_resource_url,
-                                    qualified = True)
-            
-            
-            return url_resource
diff --git a/ckanext/odsh/pdf_to_thumbnail/plugin.py b/ckanext/odsh/pdf_to_thumbnail/plugin.py
deleted file mode 100644
index f0c5f4c0..00000000
--- a/ckanext/odsh/pdf_to_thumbnail/plugin.py
+++ /dev/null
@@ -1,58 +0,0 @@
-import os 
-
-#from ckan
-import ckan.plugins as plugins
-
-#pdf_to_thumbnail 
-from . import thumbnail 
-from . import action as thumbnail_action
-from . import helpers as thumbnail_helpers
-
-import logging
-log = logging.getLogger(__name__)
-
-
-class ThumbnailPlugin(plugins.SingletonPlugin):
-    plugins.implements(plugins.IResourceController, inherit=True)
-    plugins.implements(plugins.IConfigurer, inherit=True)
-    plugins.implements(plugins.IActions, inherit=True)
-    plugins.implements(plugins.ITemplateHelpers)
-
-
-#IResourceController
-    def after_resource_create(self, context, resource):
-        resources = thumbnail.resources_of_containing_package(resource)
-        thumbnail.create_thumbnail_if_none_in_package(context, resources)
-        
-    def after_resource_update(self, context, resource):
-        resources = thumbnail.resources_of_containing_package(resource)
-        thumbnail.create_thumbnail_if_none_in_package(context, resources)
-                
-    def after_resource_delete(self, context, resources):
-        thumbnail.create_thumbnail_if_none_in_package(context, resources)
-            
-#IConfigurer 
-
-    def update_config(self, config_):
-        storage_path = config_.get('ckan.storage_path')
-        public_dir = os.path.join(storage_path, 'thumbnail')
-        if config_.get('extra_public_paths'):
-            config_['extra_public_paths'] += ',' + public_dir
-        else:
-            config_['extra_public_paths'] = public_dir
-
-#IActions
-
-    def get_actions(self):
-        return {'package_delete': thumbnail_action.before_package_delete,
-                'package_update': thumbnail_action.before_package_update 
-                }
-
-#ITemplateHelpers
-
-    def get_helpers(self):
-        
-        return {
-                'thumbnail_namespace':thumbnail_helpers.thumbnail_namespace,
-                'thumbail_get_download_link':thumbnail_helpers.get_download_link_for_thumbnail
-                }
diff --git a/ckanext/odsh/pdf_to_thumbnail/thumbnail.py b/ckanext/odsh/pdf_to_thumbnail/thumbnail.py
deleted file mode 100644
index 25067f73..00000000
--- a/ckanext/odsh/pdf_to_thumbnail/thumbnail.py
+++ /dev/null
@@ -1,223 +0,0 @@
-import os
-import magic
-from pdf2image import convert_from_bytes
-import logging
-from ckan.common import config 
-import urllib.request, urllib.error, urllib.parse
-
-from binascii import b2a_hex
-import ckan.plugins.toolkit as toolkit
-
-
-log = logging.getLogger(__name__)
-
-
-def create_thumbnail(context, resource):
-    '''
-    main entry point into this module
-    this function is called from pdf_to_thumbnail.plugin
-    '''
-    old_filename = _get_filename_from_context(context)
-    url_type = resource.get('url_type')
-    if url_type == 'upload':
-        is_PDF, filename = _create_thumbnail_from_memory(resource, old_filename)
-    else:
-        is_PDF, filename = (False, None)
-    return is_PDF, filename  
-
-
-def _get_filename_from_context(context):
-    package = context.get('package')
-    package_id = package.id
-    package= toolkit.get_action('package_show')(None, {'id': package_id})
-    thumbnail = package.get('thumbnail') 
-    return  thumbnail
-
-
-def _create_thumbnail_from_memory(resource, old_filename):
-    filepath = get_resource_path(resource)
-    is_PDF = _is_pdf(filepath)
-    if is_PDF:
-        with open(filepath, 'rb') as file:
-            new_filename = _create_thumbnail_from_file(file)
-        if old_filename:
-            ThumbnailPath.from_filename(old_filename).remove()
-        return is_PDF, new_filename
-    else:
-        return is_PDF, None
-
-
-def get_resource_path(resource):
-    # see https://stackoverflow.com/questions/46572402/where-does-ckan-store-the-files-pushed-to-datastore-filestore
-    resource_id = resource.get('id')
-    filepath = os.path.join(
-        config.get('ckan.storage_path'),
-        'resources',
-        resource_id[0:3],
-        resource_id[3:6],
-        resource_id[6:]
-    )
-    return filepath
-
-
-def _is_pdf(filepath):
-    file_type = magic.from_file(filepath, mime = True)
-    return file_type == 'application/pdf'
-
-
-def _create_thumbnail_from_file(file):
-    width = config.get('ckan.thumbnail.size.width', 410)
-    new_thumbnail = ThumbnailPath.from_unique_random_name()
-    file.seek(0)
-    file_read = file.read()
-    convert_from_bytes(
-        file_read,
-        size=(width, None),
-        output_folder=new_thumbnail.folder,
-        output_file=new_thumbnail.filename,
-        single_file=True,
-        first_page=0,
-        last_page=0,
-        fmt='jpg'
-    )
-    return new_thumbnail.filename_with_extension
-
-
-def thumbnail_folder():
-    return os.path.join(
-        config.get('ckan.storage_path'),
-        'thumbnail',
-    )
-
-
-def rename_thumbnail_to_random_name(old_filename):
-    '''
-    used by pdf_to_thumbnail.action
-    '''
-    old_filepath = ThumbnailPath.from_filename_with_extension(old_filename)
-    new_filepath = ThumbnailPath.from_unique_random_name()
-    try:
-        os.renames(old_filepath.full_filename, new_filepath.full_filename)
-        return new_filepath.filename_with_extension
-    except OSError:
-        log.warning('The file path "{}"  of package was not found.'.format(old_filepath))
-     
-
-def remove_thumbnail(context):
-    '''
-    used by pdf_to_thumbnail.action
-    '''
-    old_filename = _get_filename_from_context(context)
-    if old_filename:
-        ThumbnailPath.from_filename_with_extension(old_filename).remove()
-
-
-def resources_of_containing_package(resource):
-    #todo: change arg order
-    '''
-    used by pdf_to_thumbnail.plugin
-    '''
-    package_id = resource.get('package_id')
-    package = toolkit.get_action('package_show')(None, {'id': package_id})
-    resources = package.get('resources')
-    return resources
-        
-
-def create_thumbnail_if_none_in_package(context, resources):
-    '''
-    used by pdf_to_thumbnail.plugin
-    loops through a package's resources in the order they have been uploaded
-    and for each tries to create a thumbnail until it succeeds.
-    If the package already has a thumbnail the creation step is skipped
-    '''
-    package_dict = _get_package_dict_from_context(context)
-    if not _has_thumbnail(package_dict):
-        any(_try_create_thumbnail(context, r) for r in resources)
-
-
-def _get_package_dict_from_context(context):
-    package_id = context.get('package').id
-    package_dict = toolkit.get_action('package_show')(None, {'id': package_id})
-    return package_dict
-
-
-def _has_thumbnail(package_dict):
-    thumbnail = package_dict.get('thumbnail')
-    return bool(thumbnail)
-
-
-def _try_create_thumbnail(context, resource):
-    is_PDF, filename = create_thumbnail(context, resource)
-    success = is_PDF
-    if success:
-        _write_thumbnail_into_package(context, filename)
-    return success
-
-
-def _write_thumbnail_into_package(context, filename):
-    package_dict = _get_package_dict_from_context(context)
-    if filename:
-        package_dict.update({'thumbnail': filename})
-    toolkit.get_action('package_update')(None, package_dict)
-    
-
-class ThumbnailPath(object):
-    '''
-    utility class to manage the path of thumbnail pictures
-    '''
-
-    def __init__(self, folder, filename, extension):
-        self.folder = folder
-        self.filename = filename
-        self.extension = extension
-    
-    _EXTENSION = '.jpg'
-    
-    @staticmethod
-    def from_filename(filename):
-        '''
-        filename without extension (i.e. '.jpg')
-        '''
-        return ThumbnailPath(thumbnail_folder(), filename, ThumbnailPath._EXTENSION)
-    
-    @staticmethod
-    def from_filename_with_extension(filename_with_extension):
-        '''
-        limited to one dot in filename
-        '''
-        tokens = filename_with_extension.split('.')
-        if len(tokens) == 1:
-            filename = filename_with_extension
-            extension = ''
-        else:
-            filename = '.'.join(tokens[:-1])
-            extension = '.'.join(['', tokens[-1]])
-        return ThumbnailPath(thumbnail_folder(), filename, extension)
-
-    @staticmethod
-    def from_unique_random_name():
-        thumbnail_path = ThumbnailPath._from_random_name()
-        if thumbnail_path.exists():
-            return ThumbnailPath.from_unique_random_name()
-        return thumbnail_path
-    
-    @staticmethod
-    def _from_random_name():
-        number = b2a_hex(os.urandom(15))
-        filename = 'thumbnail_picture_' + str(number)
-        return ThumbnailPath.from_filename(filename)
-    
-    @property
-    def filename_with_extension(self):
-        return self.filename + self.extension
-    
-    @property
-    def full_filename(self):
-        return os.path.join(self.folder, self.filename_with_extension)
-    
-    def exists(self):
-        return os.path.exists(self.full_filename)
-    
-    def remove(self):
-        if os.path.exists(self.full_filename):
-            os.remove(self.full_filename)
diff --git a/ckanext/odsh/plugin.py b/ckanext/odsh/plugin.py
index f0e70e3d..9217d800 100644
--- a/ckanext/odsh/plugin.py
+++ b/ckanext/odsh/plugin.py
@@ -168,10 +168,6 @@ class OdshPlugin(p.SingletonPlugin, DefaultTranslation, tk.DefaultDatasetForm):
                 tk.get_validator('ignore_missing'),
                 tk.get_converter('convert_to_extras')
             ],
-            'thumbnail': [
-                tk.get_validator('ignore_missing'),
-                tk.get_converter('convert_to_extras')
-            ],
             'relatedPackage': [
                 tk.get_validator('validate_relatedPackage'),
                 tk.get_converter('convert_to_extras')
@@ -197,10 +193,6 @@ class OdshPlugin(p.SingletonPlugin, DefaultTranslation, tk.DefaultDatasetForm):
                 tk.get_converter('convert_from_extras'),
                 tk.get_validator('ignore_missing')
             ],
-            'thumbnail': [
-                tk.get_converter('convert_from_extras'),
-                tk.get_validator('ignore_missing')
-            ],
             'relatedPackage': [
                 tk.get_converter('convert_from_extras'),
                 tk.get_validator('ignore_missing')
diff --git a/ckanext/odsh/templates/snippets/package_item.html b/ckanext/odsh/templates/snippets/package_item.html
index f708758e..4674f7f4 100644
--- a/ckanext/odsh/templates/snippets/package_item.html
+++ b/ckanext/odsh/templates/snippets/package_item.html
@@ -29,7 +29,6 @@ Example:
 {% set daterange = h.get_daterange_prettified(package) %}
 {% set language_of_package = h.get_language_of_package(package) %}
 {% set language_icon = h.get_language_icon(package) %}
-{% set thumbnail = package.get('thumbnail') %}
 
 {% block package_item %}
   <div class="odsh-dataset-item">
diff --git a/ckanext/odsh/tools.py b/ckanext/odsh/tools.py
index b3cbd441..0a2605f0 100644
--- a/ckanext/odsh/tools.py
+++ b/ckanext/odsh/tools.py
@@ -2,6 +2,7 @@ import os
 import logging
 from ckanext.odsh.pdf_to_thumbnail.thumbnail import get_resource_path
 from ckanext.odsh.lib.uploader import calculate_hash
+from ckan.common import config 
 import ckan.plugins.toolkit as toolkit
 
 log = logging.getLogger(__name__)
@@ -21,7 +22,7 @@ def add_resource_attributes(context, resource):
     # Mark the resource as processed for this cycle (in context, not on the resource)
     context['resource_processed'] = True
 
-    path = get_resource_path(resource)
+    path = _get_resource_path(resource)
     
     # Check if the path exists and is a file
     if os.path.isfile(path):
@@ -48,3 +49,15 @@ def add_resource_attributes(context, resource):
             toolkit.abort(500, f"Error processing resource at {path}: {str(e)}")
     else:
         toolkit.abort(404, f"File not found: {path}")
+
+def _get_resource_path(resource):
+    # see https://stackoverflow.com/questions/46572402/where-does-ckan-store-the-files-pushed-to-datastore-filestore
+    resource_id = resource.get('id')
+    filepath = os.path.join(
+        config.get('ckan.storage_path'),
+        'resources',
+        resource_id[0:3],
+        resource_id[3:6],
+        resource_id[6:]
+    )
+    return filepath
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 545bc06b..00ee552e 100755
--- a/setup.py
+++ b/setup.py
@@ -86,7 +86,6 @@ setup(
         odsh_autocomplete=ckanext.odsh.plugin_odsh_autocomplete:OdshAutocompletePlugin
         odsh_dcat_harvest=ckanext.odsh.plugin_odsh_dcat_harvest:OdshDCATHarvestPlugin
         odsh_collections=ckanext.odsh.collection.plugin:CollectionsPlugin
-        thumbnail=ckanext.odsh.pdf_to_thumbnail.plugin:ThumbnailPlugin
         
         [paste.paster_command]
         odsh_initialization = ckanext.odsh.commands.initialization:Initialization
-- 
GitLab