Skip to content
Snippets Groups Projects
Commit 0bd7130a authored by Jesper Zedlitz's avatar Jesper Zedlitz
Browse files

pdf_to_thumbnail aus Transparenzportal

parent b14a9938
Branches
Tags
1 merge request!17Stage System soll in Zukunft Master Branch erhalten
...@@ -4,8 +4,7 @@ import ckan.lib.helpers as helpers ...@@ -4,8 +4,7 @@ import ckan.lib.helpers as helpers
from ckan.logic.action.update import package_update from ckan.logic.action.update import package_update
from ckan.logic.action.delete import package_delete from ckan.logic.action.delete import package_delete
#from thumbnail import thumbnail
import thumbnail as thumbnail
def before_package_delete(context, package_id_dict): def before_package_delete(context, package_id_dict):
...@@ -23,7 +22,7 @@ def before_package_update(context, pkg_dict): ...@@ -23,7 +22,7 @@ def before_package_update(context, pkg_dict):
old_filename = package.get('thumbnail') old_filename = package.get('thumbnail')
if old_filename: if old_filename:
if str(old_private) != str(new_private): if str(old_private) != str(new_private):
new_filename = thumbnail.change_filepath(old_filename) new_filename = thumbnail.rename_thumbnail_to_random_name(old_filename)
pkg_dict['extras'].append({'key': 'thumbnail', 'value': new_filename}) pkg_dict['extras'].append({'key': 'thumbnail', 'value': new_filename})
elif not pkg_dict.get('thumbnail'): elif not pkg_dict.get('thumbnail'):
pkg_dict['extras'].append({'key': 'thumbnail', 'value': old_filename}) pkg_dict['extras'].append({'key': 'thumbnail', 'value': old_filename})
......
import os import os
#from ckan #from ckan
import ckan.plugins as plugins import ckan.plugins as plugins
...@@ -22,14 +21,15 @@ class ThumbnailPlugin(plugins.SingletonPlugin): ...@@ -22,14 +21,15 @@ class ThumbnailPlugin(plugins.SingletonPlugin):
#IResourceController #IResourceController
def after_create(self, context, resource): def after_create(self, context, resource):
_, filename = thumbnail.create_thumbnail(context, resource) resources = thumbnail.resources_of_containing_package(resource)
thumbnail.write_thumbnail_into_package(context, resource, filename) thumbnail.create_thumbnail_if_none_in_package(context, resources)
def after_update(self, context, resource): def after_update(self, context, resource):
thumbnail.check_and_create_thumbnail_after_update(context, resource) resources = thumbnail.resources_of_containing_package(resource)
thumbnail.create_thumbnail_if_none_in_package(context, resources)
def after_delete(self, context, resources): def after_delete(self, context, resources):
thumbnail.create_thumbnail_for_last_resource(context, resources) thumbnail.create_thumbnail_if_none_in_package(context, resources)
#IConfigurer #IConfigurer
......
...@@ -7,7 +7,7 @@ from ckan.common import config ...@@ -7,7 +7,7 @@ from ckan.common import config
import urllib2 import urllib2
import requests import requests
import binascii from binascii import b2a_hex
import ckan.plugins.toolkit as toolkit import ckan.plugins.toolkit as toolkit
import ckan.logic as logic import ckan.logic as logic
#from extension #from extension
...@@ -16,155 +16,212 @@ import ckan.logic as logic ...@@ -16,155 +16,212 @@ import ckan.logic as logic
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
def get_filename_from_context(context): def create_thumbnail(context, resource):
'''
main entry point into this module
this function is called from pdf_to_thumbnail.plugin
'''
old_filename = _get_filename_from_context(context)
url_type = resource.get('url_type')
if url_type == 'upload':
is_PDF, filename = _create_thumbnail_from_memory(resource, old_filename)
else:
is_PDF, filename = (False, None)
return is_PDF, filename
def _get_filename_from_context(context):
package = context.get('package') package = context.get('package')
package_id = package.id package_id = package.id
package= toolkit.get_action('package_show')(context, {'id': package_id}) package= toolkit.get_action('package_show')(None, {'id': package_id})
thumbnail = package.get('thumbnail') thumbnail = package.get('thumbnail')
return thumbnail return thumbnail
def get_filepath_for_thumbnail(filename):
if filename:
return config.get('ckan.storage_path') + "/thumbnail/" + filename
return config.get('ckan.storage_path') + "/thumbnail/"
def concatenate_filename(filename): def _create_thumbnail_from_memory(resource, old_filename):
return filename + ".jpg" filepath = get_resource_path(resource)
is_PDF = _is_pdf(filepath)
if is_PDF:
with open(filepath, 'rb') as file:
new_filename = _create_thumbnail_from_file(file)
if old_filename:
ThumbnailPath.from_filename(old_filename).remove()
return is_PDF, new_filename
else:
return is_PDF, None
def get_filepath_to_resource(resource): def get_resource_path(resource):
# see https://stackoverflow.com/questions/46572402/where-does-ckan-store-the-files-pushed-to-datastore-filestore
resource_id = resource.get('id') resource_id = resource.get('id')
directory = config.get('ckan.storage_path') + '/resources/' filepath = os.path.join(
#looked up how resources are saved, by locating the keyword resources in the OS config.get('ckan.storage_path'),
path = directory + resource_id[0:3] + '/' + resource_id[3:6] + '/' + resource_id[6:] 'resources',
return path resource_id[0:3],
resource_id[3:6],
resource_id[6:]
)
return filepath
def random_filename():
number = binascii.b2a_hex(os.urandom(15)) def _is_pdf(filepath):
filename = 'thumbnail_picture_' + str(number) file_type = magic.from_file(filepath, mime = True)
full_filename = concatenate_filename(filename) return file_type == 'application/pdf'
filepath = get_filepath_for_thumbnail(full_filename)
if os.path.exists(filepath):
filename = random_filename()
return filename
def change_filepath(old_filename):
old_filepath = get_filepath_for_thumbnail(old_filename)
new_filename = concatenate_filename(random_filename())
new_filepath = get_filepath_for_thumbnail(new_filename)
try:
os.renames(old_filepath, new_filepath)
return new_filename
except OSError:
log.warning('The file path "{}" of package was not found.'.format(old_filepath))
def create_thumbnail_from_file(file, old_filename): def _create_thumbnail_from_file(file):
width = config.get('ckan.thumbnail.size.width', 410) width = config.get('ckan.thumbnail.size.width', 410)
filename = random_filename() new_thumbnail = ThumbnailPath.from_unique_random_name()
file.seek(0) file.seek(0)
file_read = file.read() file_read = file.read()
directory = get_filepath_for_thumbnail('') convert_from_bytes(
if old_filename: file_read,
old_filepath = get_filepath_for_thumbnail(concatenate_filename(old_filename))
if os.path.exists(old_filepath):
os.remove(old_filepath)
convert_from_bytes(file_read,
size=(width, None), size=(width, None),
output_folder=directory, output_folder=new_thumbnail.folder,
output_file=filename, output_file=new_thumbnail.filename,
single_file=True, single_file=True,
first_page=0, first_page=0,
last_page=0, last_page=0,
fmt='jpg' fmt='jpg'
) )
return concatenate_filename(filename) return new_thumbnail.filename_with_extension
def create_thumbnail_from_url(resource, old_filename): def thumbnail_folder():
resource_url = resource.get('url') return os.path.join(
request = urllib2.Request(resource_url) config.get('ckan.storage_path'),
response = urllib2.urlopen(request, timeout = 100000) 'thumbnail',
)
if response.code == 200:
filetowrite = response.read()
# function is set to private in ckanext.odsh.lib.uploader
# raise_validation_error_if_virus_found(filetowrite, response.read())
file_type = magic.from_buffer(response.read(), mime = True)
header = response.headers
resource_size = header.get('Content-Length')
max_available_memory = config.get('ckan.max_available_memory', 250000000) #In Bytes ca. 250 MB
with tempfile.SpooledTemporaryFile(max_size=max_available_memory) as file:
file.write(filetowrite)
new_filename = create_thumbnail_from_file(file, old_filename) def rename_thumbnail_to_random_name(old_filename):
return True, new_filename '''
used by pdf_to_thumbnail.action
'''
old_filepath = ThumbnailPath.from_filename_with_extension(old_filename)
new_filepath = ThumbnailPath.from_unique_random_name()
try:
os.renames(old_filepath.full_filename, new_filepath.full_filename)
return new_filepath.filename_with_extension
except OSError:
log.warning('The file path "{}" of package was not found.'.format(old_filepath))
def create_thumbnail_from_memory(resource, old_filename):
path = get_filepath_to_resource(resource)
file_type = magic.from_file(path, mime = True)
if file_type == 'application/pdf':
with open(path, 'rb') as file:
new_filename = create_thumbnail_from_file(file, old_filename)
is_PDF = True
return is_PDF, new_filename
else:
is_PDF = False
return is_PDF, None
def remove_thumbnail(context): def remove_thumbnail(context):
old_filename = get_filename_from_context(context) '''
used by pdf_to_thumbnail.action
'''
old_filename = _get_filename_from_context(context)
if old_filename: if old_filename:
old_filepath = get_filepath_for_thumbnail(old_filename) ThumbnailPath.from_filename_with_extension(old_filename).remove()
if os.path.exists(old_filepath):
os.remove(old_filepath)
def create_thumbnail(context, resource):
log.debug('create_thumbnail')
old_filename = get_filename_from_context(context)
url_type = resource.get('url_type')
if url_type == 'upload':
is_PDF, filename = create_thumbnail_from_memory(resource, old_filename)
else:
is_PDF, filename = create_thumbnail_from_url(resource, old_filename)
return is_PDF, filename
def check_and_create_thumbnail_after_update(context, resource): def resources_of_containing_package(resource):
log.debug('check_and_create_thumbnail_after_update') #todo: change arg order
'''
used by pdf_to_thumbnail.plugin
'''
package_id = resource.get('package_id') package_id = resource.get('package_id')
package = toolkit.get_action('package_show')(context, {'id': package_id}) package = toolkit.get_action('package_show')(None, {'id': package_id})
resources = package.get('resources') resources = package.get('resources')
if len(resources) > 0: return resources
last_resource = resources.pop()
last_resource_id = last_resource.get('id')
resource_id = resource.get('id') def create_thumbnail_if_none_in_package(context, resources):
if last_resource_id == resource_id and resource.get('url_type') != 'upload': '''
used by pdf_to_thumbnail.plugin
loops through a package's resources in the order they have been uploaded
and for each tries to create a thumbnail until it succeeds.
If the package already has a thumbnail the creation step is skipped
'''
package_dict = _get_package_dict_from_context(context)
if not _has_thumbnail(package_dict):
any(_try_create_thumbnail(context, r) for r in resources)
def _get_package_dict_from_context(context):
package_id = context.get('package').id
package_dict = toolkit.get_action('package_show')(None, {'id': package_id})
return package_dict
def _has_thumbnail(package_dict):
thumbnail = package_dict.get('thumbnail')
return bool(thumbnail)
def _try_create_thumbnail(context, resource):
is_PDF, filename = create_thumbnail(context, resource) is_PDF, filename = create_thumbnail(context, resource)
if is_PDF: success = is_PDF
write_thumbnail_into_package(context, resource, filename) if success:
_write_thumbnail_into_package(context, filename)
return success
def create_thumbnail_for_last_resource(context, resources): def _write_thumbnail_into_package(context, filename):
if len(resources) > 0: package_dict = _get_package_dict_from_context(context)
last_resource = resources.pop() if filename:
is_PDF, filename = create_thumbnail(context, last_resource) package_dict.update({'thumbnail': filename})
if not is_PDF: toolkit.get_action('package_update')(None, package_dict)
create_thumbnail_for_last_resource(context, resources)
else:
write_thumbnail_into_package(context, last_resource, filename) class ThumbnailPath(object):
'''
utility class to manage the path of thumbnail pictures
'''
def __init__(self, folder, filename, extension):
self.folder = folder
self.filename = filename
self.extension = extension
_EXTENSION = '.jpg'
@staticmethod
def from_filename(filename):
'''
filename without extension (i.e. '.jpg')
'''
return ThumbnailPath(thumbnail_folder(), filename, ThumbnailPath._EXTENSION)
@staticmethod
def from_filename_with_extension(filename_with_extension):
'''
limited to one dot in filename
'''
tokens = filename_with_extension.split('.')
if len(tokens) == 1:
filename = filename_with_extension
extension = ''
else: else:
remove_thumbnail(context) filename = '.'.join(tokens[:-1])
package = context.get('package') extension = '.'.join(['', tokens[-1]])
package_id = package.id return ThumbnailPath(thumbnail_folder(), filename, extension)
package= toolkit.get_action('package_show')(context, {'id': package_id})
package.update({'thumbnail': None}) @staticmethod
toolkit.get_action('package_update')(context, package) def from_unique_random_name():
thumbnail_path = ThumbnailPath._from_random_name()
if thumbnail_path.exists():
return ThumbnailPath.from_unique_random_name()
return thumbnail_path
@staticmethod
def _from_random_name():
number = b2a_hex(os.urandom(15))
filename = 'thumbnail_picture_' + str(number)
return ThumbnailPath.from_filename(filename)
def write_thumbnail_into_package(context, resource, filename): @property
package_id = resource.get('package_id') def filename_with_extension(self):
package = toolkit.get_action('package_show')(context, {'id': package_id}) return self.filename + self.extension
if filename:
package.update({'thumbnail': filename}) @property
toolkit.get_action('package_update')(context, package) def full_filename(self):
return os.path.join(self.folder, self.filename_with_extension)
def exists(self):
return os.path.exists(self.full_filename)
def remove(self):
if os.path.exists(self.full_filename):
os.remove(self.full_filename)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment