From 836661087aaf5c1c37c1d87903c6f9cbabdeee4e Mon Sep 17 00:00:00 2001
From: Jesper Zedlitz <jesper@zedlitz.de>
Date: Wed, 19 Feb 2025 08:06:49 +0100
Subject: [PATCH] prefer the application profile of the transfer option

The metadata has evolved. Now the format of a transfer option can usually be found in its gmd:applicationProfile element.
---
 .../opendata/csw2dcat/MDMetadata2Dataset.java | 46 +++++++++++--------
 .../de/landsh/opendata/csw2dcat/Mapping.java  |  6 +++
 .../csw2dcat/MDMetadata2DatasetTests.java     | 24 ++++++++++
 3 files changed, 56 insertions(+), 20 deletions(-)

diff --git a/src/main/java/de/landsh/opendata/csw2dcat/MDMetadata2Dataset.java b/src/main/java/de/landsh/opendata/csw2dcat/MDMetadata2Dataset.java
index dc61715..9a43c2b 100644
--- a/src/main/java/de/landsh/opendata/csw2dcat/MDMetadata2Dataset.java
+++ b/src/main/java/de/landsh/opendata/csw2dcat/MDMetadata2Dataset.java
@@ -1054,21 +1054,30 @@ public class MDMetadata2Dataset {
         return value;
     }
 
-    void addDistribution(Resource dataset, Element onlineResource) {
-        final Model model = dataset.getModel();
-
+    private static String getGlobalFormatForAllTransferOptions(Element onlineResource) {
         final Element distributionElement = onlineResource.getParent().getParent().getParent().getParent();
-
         if (!"MD_Distribution".equals(distributionElement.getName())) {
             throw new IllegalArgumentException("Could not navigate to MD_Distribution element.");
         }
-
         final Element distributionFormat = distributionElement.element("distributionFormat");
 
+        if( distributionFormat == null)
+            return null;
+
+        return getTextOrNull(distributionFormat.selectSingleNode("gmd:MD_Format/gmd:name/gco:CharacterString"));
+    }
+
+    void addDistribution(Resource dataset, Element onlineResource) {
+        final Model model = dataset.getModel();
+
+
+        // there might be a single format specified or all transfer options
+        final String globalFormat = getGlobalFormatForAllTransferOptions(onlineResource);
+
         final String url = repairURL(getTextOrNull(onlineResource.selectSingleNode("gmd:linkage/gmd:URL")));
-        final String format = distributionFormat == null ? null : getTextOrNull(distributionFormat.selectSingleNode("gmd:MD_Format/gmd:name/gco:CharacterString"));
         final String name = getTextOrNull(onlineResource.selectSingleNode("gmd:name/*"));
         final String description = getTextOrNull(onlineResource.selectSingleNode("gmd:description/*"));
+        final String applicationProfile = getTextOrNull(onlineResource.selectSingleNode("gmd:applicationProfile/*"));
 
         // Check if the specified URL is a valid URI and IRI.
         if (!isValidURLandIRI(url)) {
@@ -1082,22 +1091,19 @@ public class MDMetadata2Dataset {
         distribution.addProperty(DCAT.accessURL, model.createResource(url));
 
         Resource formatResource;
-        try {
-            formatResource = Mapping.mapFormat(format);
-        } catch (IllegalArgumentException e) {
-            formatResource = null;
-        }
 
-        if (format == null) {
-            // try to determine the format of the distribution based on gmd:applicationProfile
-            final String applicationProfile = getTextOrNull(onlineResource.selectSingleNode("gmd:applicationProfile/*"));
-            if (StringUtils.isNotEmpty(applicationProfile)) {
-                formatResource = Mapping.mapApplicationProfile(applicationProfile);
+        // Now it seems to be best to evaluate the transfer options' gmd:applicationProfile first.
+        if (StringUtils.isNotEmpty(applicationProfile)) {
+            formatResource = Mapping.mapApplicationProfile(applicationProfile);
+        } else {
+            try {
+                formatResource = Mapping.mapFormat(globalFormat);
+            } catch (IllegalArgumentException e) {
+                formatResource = null;
+            }
+            if (formatResource == null) {
+                formatResource = guessFormat(url);
             }
-        }
-
-        if (formatResource == null) {
-            formatResource = guessFormat(url);
         }
 
         if (formatResource != null) {
diff --git a/src/main/java/de/landsh/opendata/csw2dcat/Mapping.java b/src/main/java/de/landsh/opendata/csw2dcat/Mapping.java
index f12fd64..96e7a52 100644
--- a/src/main/java/de/landsh/opendata/csw2dcat/Mapping.java
+++ b/src/main/java/de/landsh/opendata/csw2dcat/Mapping.java
@@ -185,6 +185,12 @@ public class Mapping {
      * URI
      */
     public static Resource mapApplicationProfile(String applicationProfile) {
+        if(StringUtils.startsWith(applicationProfile, "http://publications.europa.eu/resource/authority/file-type/")) {
+            // The specified value is already a URI from the EU publication's office list
+            return  ResourceFactory.createResource(applicationProfile);
+        }
+
+
         if ("GeoTIFF".equals(applicationProfile)) {
             return ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/GEOTIFF");
         } else if ("PDF".equals(applicationProfile)) {
diff --git a/src/test/java/de/landsh/opendata/csw2dcat/MDMetadata2DatasetTests.java b/src/test/java/de/landsh/opendata/csw2dcat/MDMetadata2DatasetTests.java
index be227fb..e7c2eeb 100644
--- a/src/test/java/de/landsh/opendata/csw2dcat/MDMetadata2DatasetTests.java
+++ b/src/test/java/de/landsh/opendata/csw2dcat/MDMetadata2DatasetTests.java
@@ -1508,4 +1508,28 @@ public class MDMetadata2DatasetTests {
                 distributionWMS.getPropertyResourceValue(DCTerms.format).getURI());
     }
 
+    @Test
+    public void convert_variousFormats() throws DocumentException, IOException {
+
+        final Element input = saxReader.read(getClass().getResourceAsStream("/1c82089a-313e-4c25-9389-0b704c885401.xml")).getRootElement();
+
+        Resource dataset = service.convert(input);
+
+        final Map<String, Resource> distributionMap = collectDistributions(dataset);
+        assertEquals(3, distributionMap.size());
+
+        Resource distributionPDF = distributionMap.get("https://www.schleswig-holstein.de/mm/downloads/LFU/Geologie/GUEK250_mit_erlaeuterung.pdf");
+        Resource distributionSHP = distributionMap.get("https://umweltanwendungen.schleswig-holstein.de/data/meta/boden/geokart/guek250/GUEK250_abgabe.zip");
+        Resource distributionJPG = distributionMap.get("https://umweltanwendungen.schleswig-holstein.de/data/meta/boden/geokart/guek250/guek250.jpg");
+
+        assertNotNull(distributionJPG);
+        assertNotNull(distributionSHP);
+        assertNotNull(distributionPDF);
+
+        assertEquals("http://publications.europa.eu/resource/authority/file-type/SHP",
+                distributionSHP.getPropertyResourceValue(DCTerms.format).getURI());
+        assertEquals("http://publications.europa.eu/resource/authority/file-type/PDF",
+                distributionPDF.getPropertyResourceValue(DCTerms.format).getURI());
+    }
+
 }
-- 
GitLab