diff --git a/ckanext/odsh/profiles.py b/ckanext/odsh/profiles.py index a3b2933a4ae79fd7fa94c1c370f4756b6de974a7..d60026afbb617c176c64464ada910b7e6ac5fde1 100644 --- a/ckanext/odsh/profiles.py +++ b/ckanext/odsh/profiles.py @@ -1,11 +1,22 @@ from ckanext.dcatde.profiles import DCATdeProfile, DCATDE, DCAT, VCARD, dcat_theme_prefix , DCATDE_1_0 from ckanext.dcat.utils import resource_uri -from ckanext.dcat.profiles import EuropeanDCATAPProfile, DCT +from ckanext.dcat.profiles import EuropeanDCATAPProfile, DCT, URIRefOrLiteral from ckan.model.license import LicenseRegister +import rdflib import ckanext.dcatde.dataset_utils as ds_utils import logging +from ckan.plugins import toolkit +from ckan.common import config, json + +import sys +if sys.version_info[0] == 2: + import urllib2 +elif sys.version_info[0] == 3: # >=Python3.1 + import urllib log = logging.getLogger(__name__) +DCT = rdflib.namespace.Namespace("http://purl.org/dc/terms/") +DCAT = rdflib.namespace.Namespace("http://www.w3.org/ns/dcat#") class ODSHEuropeanDCATAPProfile(EuropeanDCATAPProfile): @@ -34,6 +45,25 @@ class ODSHEuropeanDCATAPProfile(EuropeanDCATAPProfile): return license_id return '' + def _distribution_format(self, distribution, normalize_ckan_format=True): + imt, label = super(ODSHEuropeanDCATAPProfile,self)._distribution_format(distribution, normalize_ckan_format) + if label in resource_formats_import(): + label = resource_formats_import()[label] + return imt, label + + def graph_from_dataset(self, dataset_dict, dataset_ref): + super(ODSHEuropeanDCATAPProfile,self).graph_from_dataset(dataset_dict, dataset_ref) + for s,p,o in self.g.triples((None, rdflib.RDF.type, DCAT.Distribution)): + for s2, p2, o2 in self.g.triples((s, DCT['format'], None)): + if o2.decode() in resource_formats_export(): + self.g.set((s, DCT['format'], rdflib.URIRef(resource_formats_export()[o2.decode()]))) + for s,p,o in self.g.triples((None, DCT.language, None)): + if o.decode() in get_language(): + self.g.set((s, p, rdflib.URIRef(get_language()[o.decode()]))) + elif type(o) == rdflib.Literal and type(URIRefOrLiteral(o.decode())) == rdflib.URIRef: + self.g.set((s, p, rdflib.URIRef(o.decode()) )) + + class ODSHDCATdeProfile(DCATdeProfile): def parse_dataset(self, dataset_dict, dataset_ref): dataset_dict = super(ODSHDCATdeProfile,self).parse_dataset(dataset_dict, dataset_ref) @@ -48,3 +78,96 @@ class ODSHDCATdeProfile(DCATdeProfile): ds_utils.insert_new_extras_field(dataset_dict, 'licenseAttributionByText', value) return dataset_dict return dataset_dict + +_RESOURCE_FORMATS_IMPORT = None +_RESOURCE_FORMATS_EXPORT = None + +def resource_formats(): + global _RESOURCE_FORMATS_IMPORT + global _RESOURCE_FORMATS_EXPORT + _RESOURCE_FORMATS_IMPORT = {} + _RESOURCE_FORMATS_EXPORT = {} + g = rdflib.Graph() + + # at first try to get the actual file list online: + try: + format_european_url = config.get('ckan.odsh.resource_formats_url') + + if not format_european_url: + log.warning("Could not find config setting: 'ckan.odsh.resource_formats_url', using fallback instead.") + format_european_url = "http://publications.europa.eu/resource/authority/file-type" + if sys.version_info[0] == 2: + urlresponse = urllib2.urlopen(urllib2.Request(format_european_url)) + elif sys.version_info[0] == 3: # >=Python3.1 + urlresponse = urllib.request.urlopen(urllib.request.Request(format_european_url)) + g.parse(urlresponse) + # At the moment, there are 143 different file types listed, + # if less than 120 are found, something went wrong. + assert len(set([s for s in g.subjects()])) > 120 + # Save the content as backup + if sys.version_info[0] == 2: + urlresponse = urllib2.urlopen(urllib2.Request(format_european_url)) + elif sys.version_info[0] == 3: # >=Python3.1 + urlresponse = urllib.request.urlopen(urllib.request.Request(format_european_url)) + f = open('/usr/lib/ckan/default/src/ckanext-odsh/ckanext/odsh/fileformats.rdf', 'w') + f.write(urlresponse.read()) + f.close() + except: + # Something went wrong with trying to get the file formats online, try to use backup instead + try: + g.parse('/usr/lib/ckan/default/src/ckanext-odsh/ckanext/odsh/fileformats.rdf') + assert len(set([s for s in g.subjects()])) > 120 + except: + raise Exception("Could not get file formats from " + format_european_url) + file_types = [subj.decode() for subj in g.subjects()] + + for elem in sorted(set(file_types)): + if elem.split('/')[-1] != 'file-type': + _RESOURCE_FORMATS_EXPORT[elem.split('/')[-1]] = elem + _RESOURCE_FORMATS_IMPORT[elem] = elem.split('/')[-1] + +def resource_formats_export(): + global _RESOURCE_FORMATS_EXPORT + if not _RESOURCE_FORMATS_EXPORT: + resource_formats() + return _RESOURCE_FORMATS_EXPORT + +def resource_formats_import(): + global _RESOURCE_FORMATS_IMPORT + if not _RESOURCE_FORMATS_IMPORT: + resource_formats() + return _RESOURCE_FORMATS_IMPORT + + +_LANGUAGES = None + +def get_language(): + ''' When datasets are exported in rdf-format, their language-tag + should be given as + "<dct:language rdf:resource="http://publications.europa.eu/.../XXX"/>", + where XXX represents the language conforming to iso-639-3 standard. + However, some imported datasets represent their language as + "<dct:language>de</dct:language>", which will be interpreted here as + iso-639-1 values. As we do not display the language setting in the + web frontend, this function only assures the correct export format, + by using 'languages.json' as mapping table. + ''' + global _LANGUAGES + if not _LANGUAGES: + _LANGUAGES = {} + languages_file_path = config.get('ckanext.odsh.language.mapping') + if not languages_file_path: + log.warning("Could not find config setting: 'ckanext.odsh.language.mapping', using fallback instead.") + languages_file_path = '/usr/lib/ckan/default/src/ckanext-odsh/languages.json' + with open(languages_file_path) as languages_file: + try: + language_mapping_table = json.loads(languages_file.read()) + except ValueError, e: + # includes simplejson.decoder.JSONDecodeError + raise ValueError('Invalid JSON syntax in %s: %s' % + (languages_file_path, e)) + + for language_line in language_mapping_table: + _LANGUAGES[language_line[0]] = language_line[1] + + return _LANGUAGES diff --git a/languages.json b/languages.json new file mode 100644 index 0000000000000000000000000000000000000000..779bc677540f93c57f0704668d15bb480e6fddea --- /dev/null +++ b/languages.json @@ -0,0 +1,185 @@ +[ + ["aa", "http://publications.europa.eu/resource/authority/language/AAR"], + ["ab", "http://publications.europa.eu/resource/authority/language/ABK"], + ["af", "http://publications.europa.eu/resource/authority/language/AFR"], + ["ak", "http://publications.europa.eu/resource/authority/language/AKA"], + ["am", "http://publications.europa.eu/resource/authority/language/AMH"], + ["ar", "http://publications.europa.eu/resource/authority/language/ARA"], + ["an", "http://publications.europa.eu/resource/authority/language/ARG"], + ["as", "http://publications.europa.eu/resource/authority/language/ASM"], + ["av", "http://publications.europa.eu/resource/authority/language/AVA"], + ["ae", "http://publications.europa.eu/resource/authority/language/AVE"], + ["ay", "http://publications.europa.eu/resource/authority/language/AYM"], + ["az", "http://publications.europa.eu/resource/authority/language/AZE"], + ["ba", "http://publications.europa.eu/resource/authority/language/BAK"], + ["bm", "http://publications.europa.eu/resource/authority/language/BAM"], + ["be", "http://publications.europa.eu/resource/authority/language/BEL"], + ["bn", "http://publications.europa.eu/resource/authority/language/BEN"], + ["bi", "http://publications.europa.eu/resource/authority/language/BIS"], + ["bo", "http://publications.europa.eu/resource/authority/language/BOD"], + ["bs", "http://publications.europa.eu/resource/authority/language/BOS"], + ["br", "http://publications.europa.eu/resource/authority/language/BRE"], + ["bg", "http://publications.europa.eu/resource/authority/language/BUL"], + ["ca", "http://publications.europa.eu/resource/authority/language/CAT"], + ["cs", "http://publications.europa.eu/resource/authority/language/CES"], + ["ch", "http://publications.europa.eu/resource/authority/language/CHA"], + ["ce", "http://publications.europa.eu/resource/authority/language/CHE"], + ["cu", "http://publications.europa.eu/resource/authority/language/CHU"], + ["cv", "http://publications.europa.eu/resource/authority/language/CHV"], + ["kw", "http://publications.europa.eu/resource/authority/language/COR"], + ["co", "http://publications.europa.eu/resource/authority/language/COS"], + ["cr", "http://publications.europa.eu/resource/authority/language/CRE"], + ["cy", "http://publications.europa.eu/resource/authority/language/CYM"], + ["da", "http://publications.europa.eu/resource/authority/language/DAN"], + ["de", "http://publications.europa.eu/resource/authority/language/DEU"], + ["dv", "http://publications.europa.eu/resource/authority/language/DIV"], + ["dz", "http://publications.europa.eu/resource/authority/language/DZO"], + ["el", "http://publications.europa.eu/resource/authority/language/ELL"], + ["en", "http://publications.europa.eu/resource/authority/language/ENG"], + ["eo", "http://publications.europa.eu/resource/authority/language/EPO"], + ["et", "http://publications.europa.eu/resource/authority/language/EST"], + ["eu", "http://publications.europa.eu/resource/authority/language/EUS"], + ["ee", "http://publications.europa.eu/resource/authority/language/EWE"], + ["fo", "http://publications.europa.eu/resource/authority/language/FAO"], + ["fa", "http://publications.europa.eu/resource/authority/language/FAS"], + ["fj", "http://publications.europa.eu/resource/authority/language/FIJ"], + ["fi", "http://publications.europa.eu/resource/authority/language/FIN"], + ["fr", "http://publications.europa.eu/resource/authority/language/FRA"], + ["fy", "http://publications.europa.eu/resource/authority/language/FRY"], + ["ff", "http://publications.europa.eu/resource/authority/language/FUL"], + ["gd", "http://publications.europa.eu/resource/authority/language/GLA"], + ["ga", "http://publications.europa.eu/resource/authority/language/GLE"], + ["gl", "http://publications.europa.eu/resource/authority/language/GLG"], + ["gv", "http://publications.europa.eu/resource/authority/language/GLV"], + ["gn", "http://publications.europa.eu/resource/authority/language/GRN"], + ["gu", "http://publications.europa.eu/resource/authority/language/GUJ"], + ["ht", "http://publications.europa.eu/resource/authority/language/HAT"], + ["ha", "http://publications.europa.eu/resource/authority/language/HAU"], + ["sh", "http://publications.europa.eu/resource/authority/language/HBS"], + ["he", "http://publications.europa.eu/resource/authority/language/HEB"], + ["hz", "http://publications.europa.eu/resource/authority/language/HER"], + ["hi", "http://publications.europa.eu/resource/authority/language/HIN"], + ["ho", "http://publications.europa.eu/resource/authority/language/HMO"], + ["hr", "http://publications.europa.eu/resource/authority/language/HRV"], + ["hu", "http://publications.europa.eu/resource/authority/language/HUN"], + ["hy", "http://publications.europa.eu/resource/authority/language/HYE"], + ["ig", "http://publications.europa.eu/resource/authority/language/IBO"], + ["io", "http://publications.europa.eu/resource/authority/language/IDO"], + ["ii", "http://publications.europa.eu/resource/authority/language/III"], + ["iu", "http://publications.europa.eu/resource/authority/language/IKU"], + ["ie", "http://publications.europa.eu/resource/authority/language/ILE"], + ["ia", "http://publications.europa.eu/resource/authority/language/INA"], + ["id", "http://publications.europa.eu/resource/authority/language/IND"], + ["ik", "http://publications.europa.eu/resource/authority/language/IPK"], + ["is", "http://publications.europa.eu/resource/authority/language/ISL"], + ["it", "http://publications.europa.eu/resource/authority/language/ITA"], + ["jv", "http://publications.europa.eu/resource/authority/language/JAV"], + ["ja", "http://publications.europa.eu/resource/authority/language/JPN"], + ["kl", "http://publications.europa.eu/resource/authority/language/KAL"], + ["kn", "http://publications.europa.eu/resource/authority/language/KAN"], + ["ks", "http://publications.europa.eu/resource/authority/language/KAS"], + ["ka", "http://publications.europa.eu/resource/authority/language/KAT"], + ["kr", "http://publications.europa.eu/resource/authority/language/KAU"], + ["kk", "http://publications.europa.eu/resource/authority/language/KAZ"], + ["km", "http://publications.europa.eu/resource/authority/language/KHM"], + ["ki", "http://publications.europa.eu/resource/authority/language/KIK"], + ["rw", "http://publications.europa.eu/resource/authority/language/KIN"], + ["ky", "http://publications.europa.eu/resource/authority/language/KIR"], + ["kv", "http://publications.europa.eu/resource/authority/language/KOM"], + ["kg", "http://publications.europa.eu/resource/authority/language/KON"], + ["ko", "http://publications.europa.eu/resource/authority/language/KOR"], + ["kj", "http://publications.europa.eu/resource/authority/language/KUA"], + ["ku", "http://publications.europa.eu/resource/authority/language/KUR"], + ["lo", "http://publications.europa.eu/resource/authority/language/LAO"], + ["la", "http://publications.europa.eu/resource/authority/language/LAT"], + ["lv", "http://publications.europa.eu/resource/authority/language/LAV"], + ["li", "http://publications.europa.eu/resource/authority/language/LIM"], + ["ln", "http://publications.europa.eu/resource/authority/language/LIN"], + ["lt", "http://publications.europa.eu/resource/authority/language/LIT"], + ["lb", "http://publications.europa.eu/resource/authority/language/LTZ"], + ["lu", "http://publications.europa.eu/resource/authority/language/LUB"], + ["lg", "http://publications.europa.eu/resource/authority/language/LUG"], + ["mh", "http://publications.europa.eu/resource/authority/language/MAH"], + ["ml", "http://publications.europa.eu/resource/authority/language/MAL"], + ["mr", "http://publications.europa.eu/resource/authority/language/MAR"], + ["mk", "http://publications.europa.eu/resource/authority/language/MKD"], + ["mg", "http://publications.europa.eu/resource/authority/language/MLG"], + ["mt", "http://publications.europa.eu/resource/authority/language/MLT"], + ["mn", "http://publications.europa.eu/resource/authority/language/MON"], + ["mi", "http://publications.europa.eu/resource/authority/language/MRI"], + ["ms", "http://publications.europa.eu/resource/authority/language/MSA"], + ["my", "http://publications.europa.eu/resource/authority/language/MYA"], + ["na", "http://publications.europa.eu/resource/authority/language/NAU"], + ["nv", "http://publications.europa.eu/resource/authority/language/NAV"], + ["nr", "http://publications.europa.eu/resource/authority/language/NBL"], + ["nd", "http://publications.europa.eu/resource/authority/language/NDE"], + ["ng", "http://publications.europa.eu/resource/authority/language/NDO"], + ["ne", "http://publications.europa.eu/resource/authority/language/NEP"], + ["nl", "http://publications.europa.eu/resource/authority/language/NLD"], + ["nn", "http://publications.europa.eu/resource/authority/language/NNO"], + ["nb", "http://publications.europa.eu/resource/authority/language/NOB"], + ["no", "http://publications.europa.eu/resource/authority/language/NOR"], + ["ny", "http://publications.europa.eu/resource/authority/language/NYA"], + ["oc", "http://publications.europa.eu/resource/authority/language/OCI"], + ["oj", "http://publications.europa.eu/resource/authority/language/OJI"], + ["or", "http://publications.europa.eu/resource/authority/language/ORI"], + ["om", "http://publications.europa.eu/resource/authority/language/ORM"], + ["os", "http://publications.europa.eu/resource/authority/language/OSS"], + ["pa", "http://publications.europa.eu/resource/authority/language/PAN"], + ["pi", "http://publications.europa.eu/resource/authority/language/PLI"], + ["pl", "http://publications.europa.eu/resource/authority/language/POL"], + ["pt", "http://publications.europa.eu/resource/authority/language/POR"], + ["ps", "http://publications.europa.eu/resource/authority/language/PUS"], + ["qu", "http://publications.europa.eu/resource/authority/language/QUE"], + ["rm", "http://publications.europa.eu/resource/authority/language/ROH"], + ["ro", "http://publications.europa.eu/resource/authority/language/RON"], + ["rn", "http://publications.europa.eu/resource/authority/language/RUN"], + ["ru", "http://publications.europa.eu/resource/authority/language/RUS"], + ["sg", "http://publications.europa.eu/resource/authority/language/SAG"], + ["sa", "http://publications.europa.eu/resource/authority/language/SAN"], + ["si", "http://publications.europa.eu/resource/authority/language/SIN"], + ["sk", "http://publications.europa.eu/resource/authority/language/SLK"], + ["sl", "http://publications.europa.eu/resource/authority/language/SLV"], + ["se", "http://publications.europa.eu/resource/authority/language/SME"], + ["sm", "http://publications.europa.eu/resource/authority/language/SMO"], + ["sn", "http://publications.europa.eu/resource/authority/language/SNA"], + ["sd", "http://publications.europa.eu/resource/authority/language/SND"], + ["so", "http://publications.europa.eu/resource/authority/language/SOM"], + ["st", "http://publications.europa.eu/resource/authority/language/SOT"], + ["es", "http://publications.europa.eu/resource/authority/language/SPA"], + ["sq", "http://publications.europa.eu/resource/authority/language/SQI"], + ["sc", "http://publications.europa.eu/resource/authority/language/SRD"], + ["sr", "http://publications.europa.eu/resource/authority/language/SRP"], + ["ss", "http://publications.europa.eu/resource/authority/language/SSW"], + ["su", "http://publications.europa.eu/resource/authority/language/SUN"], + ["sw", "http://publications.europa.eu/resource/authority/language/SWA"], + ["sv", "http://publications.europa.eu/resource/authority/language/SWE"], + ["ty", "http://publications.europa.eu/resource/authority/language/TAH"], + ["ta", "http://publications.europa.eu/resource/authority/language/TAM"], + ["tt", "http://publications.europa.eu/resource/authority/language/TAT"], + ["te", "http://publications.europa.eu/resource/authority/language/TEL"], + ["tg", "http://publications.europa.eu/resource/authority/language/TGK"], + ["tl", "http://publications.europa.eu/resource/authority/language/TGL"], + ["th", "http://publications.europa.eu/resource/authority/language/THA"], + ["ti", "http://publications.europa.eu/resource/authority/language/TIR"], + ["to", "http://publications.europa.eu/resource/authority/language/TON"], + ["tn", "http://publications.europa.eu/resource/authority/language/TSN"], + ["ts", "http://publications.europa.eu/resource/authority/language/TSO"], + ["tk", "http://publications.europa.eu/resource/authority/language/TUK"], + ["tr", "http://publications.europa.eu/resource/authority/language/TUR"], + ["tw", "http://publications.europa.eu/resource/authority/language/TWI"], + ["ug", "http://publications.europa.eu/resource/authority/language/UIG"], + ["uk", "http://publications.europa.eu/resource/authority/language/UKR"], + ["ur", "http://publications.europa.eu/resource/authority/language/URD"], + ["uz", "http://publications.europa.eu/resource/authority/language/UZB"], + ["ve", "http://publications.europa.eu/resource/authority/language/VEN"], + ["vi", "http://publications.europa.eu/resource/authority/language/VIE"], + ["vo", "http://publications.europa.eu/resource/authority/language/VOL"], + ["wa", "http://publications.europa.eu/resource/authority/language/WLN"], + ["wo", "http://publications.europa.eu/resource/authority/language/WOL"], + ["xh", "http://publications.europa.eu/resource/authority/language/XHO"], + ["yi", "http://publications.europa.eu/resource/authority/language/YID"], + ["yo", "http://publications.europa.eu/resource/authority/language/YOR"], + ["zh", "http://publications.europa.eu/resource/authority/language/ZHO"], + ["zu", "http://publications.europa.eu/resource/authority/language/ZUL"] +] \ No newline at end of file