diff --git a/CHANGELOG.md b/CHANGELOG.md index 93110a947762390c59573e33e4a16e4c2619245d..4b1ccf60d7a809db0ea3f6064f56c6a9667328cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,10 +12,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Removed Travis CI `bin/` directory, along with all associated Travis CI configuration files and scripts. - Removed the functionality that retrieved the current commit hash of the checked out Git repository for the extension. The commit hash was previously inserted into a custom API resource to provide external developers repository information. - Removed `ckanext.odsh.home` configuration variable. +- Removed `ckanext.odsh.upload_formats` configuration variable. Upload formats are now populated by EU-approved file formats. ### Changed - Reorganized default mapping and resource files for improved organization and maintenance. Mapping and config files (e.g., file formats, licenses subject mappings, etc.) are now located in `ckanext/odsh/resources/`. +- Refactor distribution/resource editing form to replace free-text format field with select field populated by EU-approved file formats. ## [2.0.0] - 2023-06-08 diff --git a/README.md b/README.md index c93ec16e981f77372152a61ce03f022b95d1221c..c3a94e5e6438ea15bde708d4f1495bf04cf38b4f 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,6 @@ Parameter | Type | Default | Description `ckanext.odsh.subject_mapping` | `string` | `/usr/lib/ckan/default/src/ckanext-odsh/ckanext/odsh/resources/subject_mapping.json` | Absolute path to subject mapping file. `ckanext.odsh.spatial.mapping` | `string` | `/usr/lib/ckan/default/src/ckanext-odsh/ckanext/odsh/resources/schleswig-holstein_geojson.csv` | Absolute path to spatial mapping file. The mapping file is expected to be a tab-separated file with three columns: URI, spatial text, and JSON geometry. `ckanext.odsh.resource_formats_fallback_filepath` | `string` | `/usr/lib/ckan/default/src/ckanext-odsh/ckanext/odsh/resources/fileformats.rdf` | Absolute path to resource formats fallback file. -`ckanext.odsh.upload_formats` | `string` | - | Known upload formats, e.g., `pdf`. `ckanext.odsh.require_at_least_one_category` | `boolean` | `False` | Indicates whether the presence of at least one category is required during validation. `ckanext.odsh.require_spatial_uri` | `boolean` | `False` | Indicates whether a spatial URI is required for the dataset. `ckanext.odsh.require_subject` | `boolean` | `True` | Indicates whether a subject is required for a dataset. diff --git a/ckanext/odsh/fanstatic/odsh_guessformat.js b/ckanext/odsh/fanstatic/odsh_guessformat.js index 3efcbff85782b72b22f5dadc325a364ef6022ecf..c55d478a79fb68f1346950da02fb146a8ad0c6dc 100644 --- a/ckanext/odsh/fanstatic/odsh_guessformat.js +++ b/ckanext/odsh/fanstatic/odsh_guessformat.js @@ -1,13 +1,13 @@ ckan.module('odsh_guessformat', function ($) { - let known_formats = ['pdf', 'rdf', 'txt', 'doc', 'csv'] + let known_formats = ['PDF', 'RDF', 'TXT', 'DOC', 'CSV'] let c = $('#field-format') let onChange = function (filename) { - let ext = filename.slice((filename.lastIndexOf(".") - 1 >>> 0) + 2).toLowerCase(); + let ext = filename.slice((filename.lastIndexOf(".") - 1 >>> 0) + 2).toUpperCase(); if (ext !== undefined && known_formats.indexOf(ext) > -1) { c.val(ext.toUpperCase()) diff --git a/ckanext/odsh/helpers.py b/ckanext/odsh/helpers.py index 5323377b2a6b99440faa18900ad8f86a1cdca42e..fa1d51b808eaa582e8843a30888f970ab77d72e5 100644 --- a/ckanext/odsh/helpers.py +++ b/ckanext/odsh/helpers.py @@ -151,10 +151,45 @@ def odsh_render_datetime(datetime_, fromIso=True): except: return '' -def odsh_upload_known_formats(): - value = config.get('ckanext.odsh.upload_formats', []) - value = toolkit.aslist(value) - return value + +def odsh_resource_formats(as_simple_list = False): + """ + Retrieves a list of file formats from an RDF file using string manipulation and sorts them alphabetically. + + Returns: + A list of dictionaries containing key-value pairs for each file format. + """ + + # Path to the RDF file with resource formats + extension_path = pkg_resources.resource_filename('ckanext.odsh', '') + filepath = config.get( + 'ckanext.odsh.resource_formats_fallback_filepath', extension_path + '/resources/fileformats.rdf') + + # Read the contents of the RDF file + with open(filepath, 'r', encoding='utf-8') as file: + rdf_content = file.read() + + # Regular expression pattern to extract the file format values + pattern = r'<rdf:Description rdf:about="http:\/\/publications.europa.eu\/resource\/authority\/file-type\/(\w+)">' + + # List to store the key-value pairs + file_formats = [] + + # Find all matches of the pattern in the RDF content + matches = re.findall(pattern, rdf_content) + + # Iterate over the matches and add key-value pairs to the list + for match in matches: + file_formats.append({'key': match, 'value': match}) + + # Sort the list alphabetically based on the file format values + file_formats = sorted(file_formats, key=lambda x: x['value']) + + if(as_simple_list): + file_formats = [obj['value'] for obj in file_formats] + + return file_formats + def odsh_encodeurl(url): return urllib.parse.quote(url, safe='') diff --git a/ckanext/odsh/plugin.py b/ckanext/odsh/plugin.py index 998aac33f4511b34c52e6f00979abaccd3e4bb56..2d6a771ba06123a0bba3e8e4515dc45aa66062a0 100644 --- a/ckanext/odsh/plugin.py +++ b/ckanext/odsh/plugin.py @@ -312,7 +312,7 @@ class OdshPlugin(plugins.SingletonPlugin, DefaultTranslation, DefaultDatasetForm 'odsh_get_bounding_box': odsh_helpers.odsh_get_bounding_box, 'odsh_get_spatial_text': odsh_helpers.odsh_get_spatial_text, 'odsh_render_datetime': odsh_helpers.odsh_render_datetime, - 'odsh_upload_known_formats': odsh_helpers.odsh_upload_known_formats, + 'odsh_resource_formats': odsh_helpers.odsh_resource_formats, 'odsh_encodeurl': odsh_helpers.odsh_encodeurl, 'odsh_extract_error': odsh_helpers.odsh_extract_error, 'odsh_extract_error_new': odsh_helpers.odsh_extract_error_new, diff --git a/ckanext/odsh/templates/package/snippets/resource_form.html b/ckanext/odsh/templates/package/snippets/resource_form.html index 81983e4bdbe12c55a85057acef83cc2614de70ba..678d05a0c0aa1c4e1366699fef7a4ed1c6a63a25 100644 --- a/ckanext/odsh/templates/package/snippets/resource_form.html +++ b/ckanext/odsh/templates/package/snippets/resource_form.html @@ -61,16 +61,12 @@ {% endif %} - {% block basic_fields_format %} + {% block basic_fields_format %} - {% set format_attrs = {'data-module': 'odsh_guessformat', 'data-module-formats':h.odsh_upload_known_formats()} %} - {% set format_attrs = {} %} - {% set error_string = _('odsh_resource_format_error_label') if errors.format %} - - {% call form.input('format', id='field-format', label=_('Format'), placeholder=_('eg. CSV, XML or JSON'), - value=data.format, error=error_string, is_required=true, classes=['control-full'],attrs=format_attrs) %} - {% endcall %} - {% endblock %} + {% set error_string = _('odsh_resource_format_error_label') if errors.format %} + {% set format_attrs = {'data-module': 'odsh_guessformat', 'data-module-formats':h.odsh_resource_formats(true)} %} + {{ form.tpsh_select('format', id='field-format', label=_('Format'), selected=data.format, options=h.odsh_resource_formats(), is_required=True, error=error_string, attrs=format_attrs, classes=['control-full'])}} + {% endblock %} {% endblock %} <div class='row-fluid'> <div class="span6"> @@ -81,8 +77,7 @@ <button class="btn btn-arrow-right" name="save" value="go-dataset" type="submit">{{ _('Back') }}</button> {% endblock %} {% block again_button %} - <button class="btn float-right btn-arrow-right" name="save" value="again" type="submit">{{_('Another - dataset')}}</button> + <button class="btn float-right btn-arrow-right" name="save" value="again" type="submit">{{_('Another dataset')}}</button> {% endblock %} {% endif %} </div> diff --git a/ckanext/odsh/tests/test_env.py b/ckanext/odsh/tests/test_env.py index 9ad5a3942d5e914c0bb9d8510e09dec5b890163b..a17ddffa308ce409a37210bf66067647cee7e5f4 100644 --- a/ckanext/odsh/tests/test_env.py +++ b/ckanext/odsh/tests/test_env.py @@ -105,7 +105,6 @@ class TestEnv: checkConfig('ckanext-archiver.user_agent_string', 'Open Data Schleswig-Holstein') checkConfig('ckan.harvest.mq.type', 'redis') - checkConfig('ckanext.odsh.upload_formats', minLength=2) checkConfig('ckanext.spatial.search_backend', 'solr-spatial-field') checkConfig('ckanext.spatial.common_map.type', 'wms') checkConfig('ckanext.spatial.common_map.wms.url', diff --git a/ckanext/odsh/validation.py b/ckanext/odsh/validation.py index cb0574e40401bf6f3617e232d518157757369bc8..9b329f0317073eb21ccfb07020b21e8da8f3dc01 100644 --- a/ckanext/odsh/validation.py +++ b/ckanext/odsh/validation.py @@ -14,6 +14,7 @@ import ckan.model as model from ckan.lib.navl.dictization_functions import Missing from ckanext.odsh.helpers_tpsh import get_package_dict +from ckanext.odsh.helpers import odsh_resource_formats import ckan.plugins.toolkit as tk import pkg_resources @@ -367,7 +368,7 @@ def validate_formats(data, errors): if not data: raise toolkit.Invalid('Missing format.') - if not data.upper() in ['7Z','AAB','AAC','AKN4EU','AKN4EU_ZIP','APK','APPX','ARC','ARCINFO_COV','ARC_GZ','ARJ','ATOM','AZW','BIN','BITS','BMP','BWF','BZIP2','CSS','CSV','DAPK','DBF','DCR','DEB','DGN','DMG','DMP','DOC','DOCX','DTD_SGML','DTD_XML','DWG','DXF','E00','EAR','ECW','EFORMS_XML','EPS','EPUB','ETSI_XML','EXE','FMX2','FMX3','FMX4','FMX4_ZIP','GDB','GEOJSON','GEOTIFF','GIF','GML','GMZ','GPKG','GRID','GRID_ASCII','GTFS','GZIP','HDF','HDT','HTML','HTML5','HTML_SIMPL','ICS','IMMC_XML','INDD','IPA','ISO','ISO_ZIP','JAR','JATS','JPEG','JPEG2000','JS','JSON','JSON_LD','KML','KMZ','LAS','LAZ','LEG','LHA','LPK','LZIP','LZMA','LZO','MAP_PRVW','MAP_SRVC','MBOX','MDB','METS','METS_ZIP','MHTML','MIF_MID','MOBI','MOP','MP3','MPEG2','MPEG4','MPEG4_AVC','MRSID','MSG_HTTP','MSI','MXD','N3','NETCDF','OAPK','OCTET','ODB','ODC','ODF','ODG','ODP','ODS','ODT','OP_DATPRO','OVF','OWL','PDF','PDF1X','PDFA1A','PDFA1B','PDFA2A','PDFA2B','PDFA3','PDFUA','PDFX','PDFX1A','PDFX2A','PDFX4','PL','PNG','PPS','PPSX','PPT','PPTX','PS','PSD','PWP','QGS','RAR','RDF','RDFA','RDF_N_QUADS','RDF_N_TRIPLES','RDF_THRIFT','RDF_TRIG','RDF_TRIX','RDF_TURTLE','RDF_XML','REST','RPM','RSS','RTF','SB3','SCHEMA_XML','SDMX','SGML','SHP','SKOS_XML','SPARQLQ','SPARQLQRES','SQL','STL','SVG','SWM','TAB','TAB_RSTR','TAR','TAR_GZ','TAR_XZ','TIFF','TIFF_FX','TMX','TSV','TXT','UNGEN','WAR','WARC','WARC_GZ','WAV','WCS_SRVC','WEBP','WFS_SRVC','WIM','WMS_SRVC','WMTS_SRVC','WORLD','XHTML','XHTML5','XHTML_SIMPL','XLIFF','XLS','XLSB','XLSM','XLSX','XML','XSLFO','XSLT','XYZ','XZ','YAML','Z','ZIP']: + if not any(data.upper() == obj['key'] for obj in odsh_resource_formats()): raise toolkit.Invalid(_('Only formats on the list of the EU Publications Office are allowed.')) return data