From 836661087aaf5c1c37c1d87903c6f9cbabdeee4e Mon Sep 17 00:00:00 2001 From: Jesper Zedlitz <jesper@zedlitz.de> Date: Wed, 19 Feb 2025 08:06:49 +0100 Subject: [PATCH] prefer the application profile of the transfer option The metadata has evolved. Now the format of a transfer option can usually be found in its gmd:applicationProfile element. --- .../opendata/csw2dcat/MDMetadata2Dataset.java | 46 +++++++++++-------- .../de/landsh/opendata/csw2dcat/Mapping.java | 6 +++ .../csw2dcat/MDMetadata2DatasetTests.java | 24 ++++++++++ 3 files changed, 56 insertions(+), 20 deletions(-) diff --git a/src/main/java/de/landsh/opendata/csw2dcat/MDMetadata2Dataset.java b/src/main/java/de/landsh/opendata/csw2dcat/MDMetadata2Dataset.java index dc61715..9a43c2b 100644 --- a/src/main/java/de/landsh/opendata/csw2dcat/MDMetadata2Dataset.java +++ b/src/main/java/de/landsh/opendata/csw2dcat/MDMetadata2Dataset.java @@ -1054,21 +1054,30 @@ public class MDMetadata2Dataset { return value; } - void addDistribution(Resource dataset, Element onlineResource) { - final Model model = dataset.getModel(); - + private static String getGlobalFormatForAllTransferOptions(Element onlineResource) { final Element distributionElement = onlineResource.getParent().getParent().getParent().getParent(); - if (!"MD_Distribution".equals(distributionElement.getName())) { throw new IllegalArgumentException("Could not navigate to MD_Distribution element."); } - final Element distributionFormat = distributionElement.element("distributionFormat"); + if( distributionFormat == null) + return null; + + return getTextOrNull(distributionFormat.selectSingleNode("gmd:MD_Format/gmd:name/gco:CharacterString")); + } + + void addDistribution(Resource dataset, Element onlineResource) { + final Model model = dataset.getModel(); + + + // there might be a single format specified or all transfer options + final String globalFormat = getGlobalFormatForAllTransferOptions(onlineResource); + final String url = repairURL(getTextOrNull(onlineResource.selectSingleNode("gmd:linkage/gmd:URL"))); - final String format = distributionFormat == null ? null : getTextOrNull(distributionFormat.selectSingleNode("gmd:MD_Format/gmd:name/gco:CharacterString")); final String name = getTextOrNull(onlineResource.selectSingleNode("gmd:name/*")); final String description = getTextOrNull(onlineResource.selectSingleNode("gmd:description/*")); + final String applicationProfile = getTextOrNull(onlineResource.selectSingleNode("gmd:applicationProfile/*")); // Check if the specified URL is a valid URI and IRI. if (!isValidURLandIRI(url)) { @@ -1082,22 +1091,19 @@ public class MDMetadata2Dataset { distribution.addProperty(DCAT.accessURL, model.createResource(url)); Resource formatResource; - try { - formatResource = Mapping.mapFormat(format); - } catch (IllegalArgumentException e) { - formatResource = null; - } - if (format == null) { - // try to determine the format of the distribution based on gmd:applicationProfile - final String applicationProfile = getTextOrNull(onlineResource.selectSingleNode("gmd:applicationProfile/*")); - if (StringUtils.isNotEmpty(applicationProfile)) { - formatResource = Mapping.mapApplicationProfile(applicationProfile); + // Now it seems to be best to evaluate the transfer options' gmd:applicationProfile first. + if (StringUtils.isNotEmpty(applicationProfile)) { + formatResource = Mapping.mapApplicationProfile(applicationProfile); + } else { + try { + formatResource = Mapping.mapFormat(globalFormat); + } catch (IllegalArgumentException e) { + formatResource = null; + } + if (formatResource == null) { + formatResource = guessFormat(url); } - } - - if (formatResource == null) { - formatResource = guessFormat(url); } if (formatResource != null) { diff --git a/src/main/java/de/landsh/opendata/csw2dcat/Mapping.java b/src/main/java/de/landsh/opendata/csw2dcat/Mapping.java index f12fd64..96e7a52 100644 --- a/src/main/java/de/landsh/opendata/csw2dcat/Mapping.java +++ b/src/main/java/de/landsh/opendata/csw2dcat/Mapping.java @@ -185,6 +185,12 @@ public class Mapping { * URI */ public static Resource mapApplicationProfile(String applicationProfile) { + if(StringUtils.startsWith(applicationProfile, "http://publications.europa.eu/resource/authority/file-type/")) { + // The specified value is already a URI from the EU publication's office list + return ResourceFactory.createResource(applicationProfile); + } + + if ("GeoTIFF".equals(applicationProfile)) { return ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/GEOTIFF"); } else if ("PDF".equals(applicationProfile)) { diff --git a/src/test/java/de/landsh/opendata/csw2dcat/MDMetadata2DatasetTests.java b/src/test/java/de/landsh/opendata/csw2dcat/MDMetadata2DatasetTests.java index be227fb..e7c2eeb 100644 --- a/src/test/java/de/landsh/opendata/csw2dcat/MDMetadata2DatasetTests.java +++ b/src/test/java/de/landsh/opendata/csw2dcat/MDMetadata2DatasetTests.java @@ -1508,4 +1508,28 @@ public class MDMetadata2DatasetTests { distributionWMS.getPropertyResourceValue(DCTerms.format).getURI()); } + @Test + public void convert_variousFormats() throws DocumentException, IOException { + + final Element input = saxReader.read(getClass().getResourceAsStream("/1c82089a-313e-4c25-9389-0b704c885401.xml")).getRootElement(); + + Resource dataset = service.convert(input); + + final Map<String, Resource> distributionMap = collectDistributions(dataset); + assertEquals(3, distributionMap.size()); + + Resource distributionPDF = distributionMap.get("https://www.schleswig-holstein.de/mm/downloads/LFU/Geologie/GUEK250_mit_erlaeuterung.pdf"); + Resource distributionSHP = distributionMap.get("https://umweltanwendungen.schleswig-holstein.de/data/meta/boden/geokart/guek250/GUEK250_abgabe.zip"); + Resource distributionJPG = distributionMap.get("https://umweltanwendungen.schleswig-holstein.de/data/meta/boden/geokart/guek250/guek250.jpg"); + + assertNotNull(distributionJPG); + assertNotNull(distributionSHP); + assertNotNull(distributionPDF); + + assertEquals("http://publications.europa.eu/resource/authority/file-type/SHP", + distributionSHP.getPropertyResourceValue(DCTerms.format).getURI()); + assertEquals("http://publications.europa.eu/resource/authority/file-type/PDF", + distributionPDF.getPropertyResourceValue(DCTerms.format).getURI()); + } + } -- GitLab