From 004edfb855b76844b0027fd77d450aec9e38f81e Mon Sep 17 00:00:00 2001
From: Jesper Zedlitz <jesper@zedlitz.de>
Date: Thu, 27 Feb 2025 07:46:33 +0100
Subject: [PATCH] Detect incorrectly marked dataset series.

Some dataset series have been incorrectly assigned the MD_ScopeCode "dataset". As a heuristic datasets
with a title ending with "(Serie)" will be treated as dataset series.
---
 .../opendata/csw2dcat/MDMetadata2Dataset.java | 13 +++++---
 .../csw2dcat/MDMetadata2DatasetTests.java     | 32 +++++++++++++++++++
 2 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/src/main/java/de/landsh/opendata/csw2dcat/MDMetadata2Dataset.java b/src/main/java/de/landsh/opendata/csw2dcat/MDMetadata2Dataset.java
index 9a43c2b..9b9da61 100644
--- a/src/main/java/de/landsh/opendata/csw2dcat/MDMetadata2Dataset.java
+++ b/src/main/java/de/landsh/opendata/csw2dcat/MDMetadata2Dataset.java
@@ -351,7 +351,7 @@ public class MDMetadata2Dataset {
         return responsibleParties;
     }
 
-    private static void addNamespaces(Element metadata) {
+    static void addNamespaces(Element metadata) {
         metadata.addNamespace("gco", "http://www.isotc211.org/2005/gco");
         metadata.addNamespace("gmx", "http://www.isotc211.org/2005/gmx");
         metadata.addNamespace("gmd", "http://www.isotc211.org/2005/gmd");
@@ -846,12 +846,17 @@ public class MDMetadata2Dataset {
 
     }
 
-    private boolean isDatasetSeries(Element metadata) {
+    static boolean  isDatasetSeries(Element metadata) {
         final Element scopeCode = (Element) metadata.selectSingleNode("gmd:hierarchyLevel/gmd:MD_ScopeCode");
-        return scopeCode != null && "series".equals(scopeCode.attributeValue("codeListValue"));
+        if (scopeCode != null && "series".equals(scopeCode.attributeValue("codeListValue"))) {
+            return true;
+        }
+
+        final String title = getTextOrNull(metadata.selectSingleNode("gmd:identificationInfo/*/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString"));
+        return StringUtils.endsWith(title, "(Serie)");
     }
 
-    private boolean isDataService(Element metadata) {
+    static boolean isDataService(Element metadata) {
         final Element scopeCode = (Element) metadata.selectSingleNode("gmd:hierarchyLevel/gmd:MD_ScopeCode");
         return scopeCode != null && "service".equals(scopeCode.attributeValue("codeListValue"));
     }
diff --git a/src/test/java/de/landsh/opendata/csw2dcat/MDMetadata2DatasetTests.java b/src/test/java/de/landsh/opendata/csw2dcat/MDMetadata2DatasetTests.java
index e7c2eeb..00416a5 100644
--- a/src/test/java/de/landsh/opendata/csw2dcat/MDMetadata2DatasetTests.java
+++ b/src/test/java/de/landsh/opendata/csw2dcat/MDMetadata2DatasetTests.java
@@ -1532,4 +1532,36 @@ public class MDMetadata2DatasetTests {
                 distributionPDF.getPropertyResourceValue(DCTerms.format).getURI());
     }
 
+    /**
+     * This is a read dataset series with the correct MD_ScopeCode
+     */
+    @Test
+    public void isDatasetSeries_realSeries() throws DocumentException {
+        final Element input = saxReader.read(getClass().getResourceAsStream("/f7f90143-c2ad-46b2-934d-93dfd5e0f031.xml")).getRootElement();
+        MDMetadata2Dataset.addNamespaces(input);
+        boolean result = MDMetadata2Dataset.isDatasetSeries(input);
+        assertTrue(result);
+    }
+
+    /**
+     * This dataset series has been incorrectly assigned the MD_ScopeCode dataset. But the title contains "(Serie)".
+     */
+    @Test
+    public void isDatasetSeries_title() throws DocumentException {
+        final Element input = saxReader.read(getClass().getResourceAsStream("/7b510ce5-d4d5-48d0-867b-c80778cf453c.xml")).getRootElement();
+        MDMetadata2Dataset.addNamespaces(input);
+        boolean result = MDMetadata2Dataset.isDatasetSeries(input);
+        assertTrue(result);
+    }
+
+    /**
+     * This is a dataset and not a dataset series.
+     */
+    @Test
+    public void isDatasetSeries_dataset() throws DocumentException {
+        final Element input = saxReader.read(getClass().getResourceAsStream("/1c82089a-313e-4c25-9389-0b704c885401.xml")).getRootElement();
+        MDMetadata2Dataset.addNamespaces(input);
+        boolean result = MDMetadata2Dataset.isDatasetSeries(input);
+        assertFalse(result, "This is not a dataset series.");
+    }
 }
-- 
GitLab