From 004edfb855b76844b0027fd77d450aec9e38f81e Mon Sep 17 00:00:00 2001 From: Jesper Zedlitz <jesper@zedlitz.de> Date: Thu, 27 Feb 2025 07:46:33 +0100 Subject: [PATCH] Detect incorrectly marked dataset series. Some dataset series have been incorrectly assigned the MD_ScopeCode "dataset". As a heuristic datasets with a title ending with "(Serie)" will be treated as dataset series. --- .../opendata/csw2dcat/MDMetadata2Dataset.java | 13 +++++--- .../csw2dcat/MDMetadata2DatasetTests.java | 32 +++++++++++++++++++ 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/src/main/java/de/landsh/opendata/csw2dcat/MDMetadata2Dataset.java b/src/main/java/de/landsh/opendata/csw2dcat/MDMetadata2Dataset.java index 9a43c2b..9b9da61 100644 --- a/src/main/java/de/landsh/opendata/csw2dcat/MDMetadata2Dataset.java +++ b/src/main/java/de/landsh/opendata/csw2dcat/MDMetadata2Dataset.java @@ -351,7 +351,7 @@ public class MDMetadata2Dataset { return responsibleParties; } - private static void addNamespaces(Element metadata) { + static void addNamespaces(Element metadata) { metadata.addNamespace("gco", "http://www.isotc211.org/2005/gco"); metadata.addNamespace("gmx", "http://www.isotc211.org/2005/gmx"); metadata.addNamespace("gmd", "http://www.isotc211.org/2005/gmd"); @@ -846,12 +846,17 @@ public class MDMetadata2Dataset { } - private boolean isDatasetSeries(Element metadata) { + static boolean isDatasetSeries(Element metadata) { final Element scopeCode = (Element) metadata.selectSingleNode("gmd:hierarchyLevel/gmd:MD_ScopeCode"); - return scopeCode != null && "series".equals(scopeCode.attributeValue("codeListValue")); + if (scopeCode != null && "series".equals(scopeCode.attributeValue("codeListValue"))) { + return true; + } + + final String title = getTextOrNull(metadata.selectSingleNode("gmd:identificationInfo/*/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString")); + return StringUtils.endsWith(title, "(Serie)"); } - private boolean isDataService(Element metadata) { + static boolean isDataService(Element metadata) { final Element scopeCode = (Element) metadata.selectSingleNode("gmd:hierarchyLevel/gmd:MD_ScopeCode"); return scopeCode != null && "service".equals(scopeCode.attributeValue("codeListValue")); } diff --git a/src/test/java/de/landsh/opendata/csw2dcat/MDMetadata2DatasetTests.java b/src/test/java/de/landsh/opendata/csw2dcat/MDMetadata2DatasetTests.java index e7c2eeb..00416a5 100644 --- a/src/test/java/de/landsh/opendata/csw2dcat/MDMetadata2DatasetTests.java +++ b/src/test/java/de/landsh/opendata/csw2dcat/MDMetadata2DatasetTests.java @@ -1532,4 +1532,36 @@ public class MDMetadata2DatasetTests { distributionPDF.getPropertyResourceValue(DCTerms.format).getURI()); } + /** + * This is a read dataset series with the correct MD_ScopeCode + */ + @Test + public void isDatasetSeries_realSeries() throws DocumentException { + final Element input = saxReader.read(getClass().getResourceAsStream("/f7f90143-c2ad-46b2-934d-93dfd5e0f031.xml")).getRootElement(); + MDMetadata2Dataset.addNamespaces(input); + boolean result = MDMetadata2Dataset.isDatasetSeries(input); + assertTrue(result); + } + + /** + * This dataset series has been incorrectly assigned the MD_ScopeCode dataset. But the title contains "(Serie)". + */ + @Test + public void isDatasetSeries_title() throws DocumentException { + final Element input = saxReader.read(getClass().getResourceAsStream("/7b510ce5-d4d5-48d0-867b-c80778cf453c.xml")).getRootElement(); + MDMetadata2Dataset.addNamespaces(input); + boolean result = MDMetadata2Dataset.isDatasetSeries(input); + assertTrue(result); + } + + /** + * This is a dataset and not a dataset series. + */ + @Test + public void isDatasetSeries_dataset() throws DocumentException { + final Element input = saxReader.read(getClass().getResourceAsStream("/1c82089a-313e-4c25-9389-0b704c885401.xml")).getRootElement(); + MDMetadata2Dataset.addNamespaces(input); + boolean result = MDMetadata2Dataset.isDatasetSeries(input); + assertFalse(result, "This is not a dataset series."); + } } -- GitLab