From 43cf651f8d3a6173b77c0db40b729227ea09e73d Mon Sep 17 00:00:00 2001 From: Jesper Zedlitz <jesper@zedlitz.de> Date: Wed, 4 Dec 2024 15:18:11 +0100 Subject: [PATCH] Erzeuge zu jedem verlinkten Dienst eine DataService Resource --- .../opendata/csw2dcat/MDMetadata2Dataset.java | 169 ++++++++++++------ .../csw2dcat/MDMetadata2DatasetTests.java | 24 +++ 2 files changed, 140 insertions(+), 53 deletions(-) diff --git a/src/main/java/de/landsh/opendata/csw2dcat/MDMetadata2Dataset.java b/src/main/java/de/landsh/opendata/csw2dcat/MDMetadata2Dataset.java index ba69f81..03fc7f9 100644 --- a/src/main/java/de/landsh/opendata/csw2dcat/MDMetadata2Dataset.java +++ b/src/main/java/de/landsh/opendata/csw2dcat/MDMetadata2Dataset.java @@ -74,7 +74,7 @@ public class MDMetadata2Dataset { themeMapping.put("ENER", new String[]{"ENER", "utilitiesCommunication"}); themeMapping.put("INTR", new String[]{"INTR"}); - final JSONArray keywordArray = new JSONArray(new JSONTokener(getClass().getResourceAsStream("/keyword-mapping.json"))); + final JSONArray keywordArray = new JSONArray(new JSONTokener(Objects.requireNonNull(getClass().getResourceAsStream("/keyword-mapping.json")))); for (int i = 0; i < keywordArray.length(); i++) { final JSONObject entry = keywordArray.getJSONObject(i); final String key = entry.keys().next(); @@ -353,6 +353,14 @@ public class MDMetadata2Dataset { return responsibleParties; } + private static void addNamespaces(Element metadata) { + metadata.addNamespace("gco", "http://www.isotc211.org/2005/gco"); + metadata.addNamespace("gmx", "http://www.isotc211.org/2005/gmx"); + metadata.addNamespace("gmd", "http://www.isotc211.org/2005/gmd"); + metadata.addNamespace("xlink", "http://www.w3.org/1999/xlink"); + metadata.addNamespace("srv", "http://www.isotc211.org/2005/srv"); + } + public BridgeSettings getSettings() { return settings; } @@ -383,10 +391,12 @@ public class MDMetadata2Dataset { * @return dcat:Distribution resource object */ Collection<Resource> convertServiceToDistributions(Element metadata) { + addNamespaces(metadata); if (!"MD_Metadata".equals(metadata.getName())) { throw new IllegalArgumentException("Input must be a gmd:MD_Metadata element."); } + final String serviceId = getTextOrNull(metadata.selectSingleNode("gmd:fileIdentifier/gco:CharacterString")); metadata.addNamespace("gmd", "http://www.isotc211.org/2005/gmd"); if (null == metadata.selectSingleNode("gmd:hierarchyLevel/gmd:MD_ScopeCode[@codeListValue='service']")) { @@ -398,7 +408,7 @@ public class MDMetadata2Dataset { int numberOfTransferOptions = metadata.selectNodes("gmd:distributionInfo/*/gmd:transferOptions/*/gmd:onLine/*[gmd:function/*/@codeListValue='download' and gmd:linkage/*[text()]]").size(); if (numberOfTransferOptions > 1) { log.debug("Multi-format service"); - return convertMultiFormatDataset(metadata); + return convertMultiFormatDataset(metadata, true); } final String url; @@ -419,7 +429,8 @@ public class MDMetadata2Dataset { final Resource distribution = model.createResource(url); distribution.addProperty(RDF.type, DCAT.Distribution); distribution.addProperty(DCAT.accessURL, model.createResource(url)); - distribution.addProperty(DCAT.downloadURL, model.createResource(url)); + // distribution.addProperty(DCAT.downloadURL, model.createResource(url)); + distribution.addProperty(DCAT.accessService, model.createResource(settings.baseIRI + serviceId)); final String title = getTextOrNull(metadata.selectSingleNode("gmd:identificationInfo/*/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString")); if (title != null) { @@ -448,8 +459,8 @@ public class MDMetadata2Dataset { return Collections.singleton(distribution); } - private Collection<Resource> convertMultiFormatDataset(Element metadata) { - final String serviceId = getTextOrNull(metadata.selectSingleNode("gmd:fileIdentifier/gco:CharacterString")); + private Collection<Resource> convertMultiFormatDataset(Element metadata, boolean isService) { + final String serviceId = isService ? getTextOrNull(metadata.selectSingleNode("gmd:fileIdentifier/gco:CharacterString")) : null; final List<Node> otherLegalConstraints = metadata.selectNodes("gmd:identificationInfo/*/gmd:resourceConstraints/gmd:MD_LegalConstraints/gmd:otherConstraints/gco:CharacterString"); final String dateStamp = getTextOrNull(metadata.selectSingleNode("gmd:dateStamp/gco:Date")); @@ -620,46 +631,10 @@ public class MDMetadata2Dataset { } /** - * Convert a gmd:MD_Metadata {@link Element} into a DCAT-AP.de compliant dcat:Dataset {@link Resource}. + * Convert commons information used for dcat:Datasets, dcat:DataServices and dcat:DatasetSeries */ - public Resource convert(Element metadata) throws DocumentException, IOException { - - if ("GetRecordByIdResponse".equals(metadata.getName())) { - metadata = metadata.element("MD_Metadata"); - } - - if (!"MD_Metadata".equals(metadata.getName())) { - throw new IllegalArgumentException("Only a gmd:MD_Metadata element can be converted."); - } - - addNamespaces(metadata); - - final Element scopeCode = (Element) metadata.selectSingleNode("gmd:hierarchyLevel/gmd:MD_ScopeCode"); - if (scopeCode == null) { - log.warn("MD_Metadata without MD_ScopeCode."); - } else if ("service".equals(scopeCode.attributeValue("codeListValue"))) { - return null; - } - - final Resource dataset; + void convertDataResource(Element metadata, Resource dataset) { final String id = getTextOrNull(metadata.selectSingleNode("gmd:fileIdentifier/gco:CharacterString")); - if (id != null) { - dataset = model.createResource(settings.baseIRI + id); - dataset.addLiteral(Adms.identifier, id.trim()); - dataset.addLiteral(DCTerms.identifier, id.trim()); - } else { - dataset = model.createResource(); - } - - dataset.addProperty(RDF.type, DCAT.Dataset); - - if (scopeCode != null) { - try { - dataset.addProperty(DCTerms.type, model.createResource(Mapping.mapScopeCode(scopeCode))); - } catch (IllegalArgumentException ignore) { - log.warn("Dataset {} has an invalid scopeCode.", id); - } - } final String title = getTextOrNull(metadata.selectSingleNode("gmd:identificationInfo/*/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString")); if (title != null) { @@ -867,6 +842,53 @@ public class MDMetadata2Dataset { // some fixed properties dataset.addProperty(DCATAPde.contributorID, model.createResource(settings.contributorId)); + } + + /** + * Convert a gmd:MD_Metadata {@link Element} into a DCAT-AP.de compliant dcat:Dataset {@link Resource}. + */ + public Resource convert(Element metadata) throws DocumentException, IOException { + + if ("GetRecordByIdResponse".equals(metadata.getName())) { + metadata = metadata.element("MD_Metadata"); + } + + if (!"MD_Metadata".equals(metadata.getName())) { + throw new IllegalArgumentException("Only a gmd:MD_Metadata element can be converted."); + } + + addNamespaces(metadata); + + final Element scopeCode = (Element) metadata.selectSingleNode("gmd:hierarchyLevel/gmd:MD_ScopeCode"); + if (scopeCode == null) { + log.warn("MD_Metadata without MD_ScopeCode."); + } else if ("service".equals(scopeCode.attributeValue("codeListValue"))) { + return null; + } + + final Resource dataset; + final String id = getTextOrNull(metadata.selectSingleNode("gmd:fileIdentifier/gco:CharacterString")); + if (id != null) { + dataset = model.createResource(settings.baseIRI + id); + dataset.addLiteral(Adms.identifier, id.trim()); + dataset.addLiteral(DCTerms.identifier, id.trim()); + } else { + dataset = model.createResource(); + } + + dataset.addProperty(RDF.type, DCAT.Dataset); + + if (scopeCode != null) { + try { + dataset.addProperty(DCTerms.type, model.createResource(Mapping.mapScopeCode(scopeCode))); + } catch (IllegalArgumentException ignore) { + log.warn("Dataset {} has an invalid scopeCode.", id); + } + } + + convertDataResource(metadata, dataset); + + final Element mdIdentifier = (Element) metadata.selectSingleNode("gmd:identificationInfo/*/gmd:citation/gmd:CI_Citation/gmd:identifier/gmd:MD_Identifier/gmd:code/gco:CharacterString"); final String searchId = mdIdentifier != null ? mdIdentifier.getTextTrim() : id; @@ -882,6 +904,9 @@ public class MDMetadata2Dataset { services = Collections.emptyList(); } for (Element service : services) { + + createDataService(service, model); + try { for (Resource dist : convertServiceToDistributions(service)) { dataset.addProperty(DCAT.distribution, dist); @@ -906,7 +931,7 @@ public class MDMetadata2Dataset { if (isMultiFormatDataset(metadata)) { // This is a multi-format dataset. log.debug("Multi-format dataset."); - for (Resource distribution : convertMultiFormatDataset(metadata)) { + for (Resource distribution : convertMultiFormatDataset(metadata, false)) { dataset.addProperty(DCAT.distribution, distribution); } @@ -917,6 +942,9 @@ public class MDMetadata2Dataset { } } + final List<Node> otherLegalConstraints = metadata.selectNodes("gmd:identificationInfo/*/gmd:resourceConstraints/gmd:MD_LegalConstraints/gmd:otherConstraints/gco:CharacterString"); + final String dateStamp = getTextOrNull(metadata.element("dateStamp")); + final List<Node> onlineResourceInformation = metadata.selectNodes("gmd:distributionInfo/*/gmd:transferOptions/*/gmd:onLine/*[gmd:function/*/@codeListValue='information' and gmd:linkage/*[text()]]"); for (Node node : onlineResourceInformation) { final Element linkage = ((Element) node).element("linkage"); @@ -925,7 +953,7 @@ public class MDMetadata2Dataset { if (settings.findWMSinInfo && StringUtils.containsIgnoreCase(url, "service=WMS") && StringUtils.containsIgnoreCase(url, "request=GetCapabilities")) { - Resource distribution = convertOnlineResourceToDistribution(node, otherLegalConstraints, getTextOrNull(dateStamp), null); + Resource distribution = convertOnlineResourceToDistribution(node, otherLegalConstraints, dateStamp, null); if (distribution != null) dataset.addProperty(DCAT.distribution, distribution); } else { @@ -948,6 +976,49 @@ public class MDMetadata2Dataset { return dataset; } + /** + * Create a dcat:DataService resource for a service. + */ + Resource createDataService(Element metadata, Model model) { + final Resource dataService; + addNamespaces(metadata); + final String id = getTextOrNull(metadata.selectSingleNode("gmd:fileIdentifier/gco:CharacterString")); + if (id != null) { + dataService = model.createResource(settings.baseIRI + id); + dataService.addLiteral(Adms.identifier, id.trim()); + dataService.addLiteral(DCTerms.identifier, id.trim()); + } else { + dataService = model.createResource(); + } + + dataService.addProperty(RDF.type, DCAT.DataService); + + convertDataResource(metadata, dataService); + + String url; + // Is it a service with a comprehensive description of operations? + if (metadata.selectSingleNode(XPATH_OPERATION_GET_CAPABILITIES) != null) { + final Element operationMetadata = (Element) metadata.selectSingleNode(XPATH_OPERATION_GET_CAPABILITIES); + url = repairURL(StringUtils.trim(operationMetadata.selectSingleNode("srv:connectPoint/gmd:CI_OnlineResource/gmd:linkage/gmd:URL").getStringValue())); + } else { + url = repairURL(getTextOrNull(metadata.selectSingleNode("gmd:identificationInfo[1]/*/gmd:citation/gmd:CI_Citation/gmd:identifier/gmd:MD_Identifier/gmd:code"))); + } + + if (StringUtils.contains(StringUtils.lowerCase(url), "request=getcapabilities")) { + // the GetCapabilites request is linked + dataService.addProperty(DCAT.endpointDescription, model.createResource(url)); + dataService.addProperty(DCAT.endpointURL, model.createResource(StringUtils.substringBefore(url, "?"))); + } else { + dataService.addProperty(DCAT.endpointURL, model.createResource(url)); + } + + List<Node> linkedDatasets = metadata.selectNodes("gmd:identificationInfo[1]/*/srv:operatesOn/@xlink:href"); + for (Node linkedDataset : linkedDatasets) { + dataService.addProperty(DCAT.servesDataset, model.createResource(StringUtils.trim(linkedDataset.getText()))); + } + + return dataService; + } private boolean isMultiFormatDataset(Element metadata) { final List<Node> formatElements = metadata.selectNodes("gmd:distributionInfo/*/gmd:distributionFormat"); @@ -1049,14 +1120,6 @@ public class MDMetadata2Dataset { dataset.addProperty(DCAT.distribution, distribution); } - private void addNamespaces(Element metadata) { - metadata.addNamespace("gco", "http://www.isotc211.org/2005/gco"); - metadata.addNamespace("gmx", "http://www.isotc211.org/2005/gmx"); - metadata.addNamespace("gmd", "http://www.isotc211.org/2005/gmd"); - metadata.addNamespace("xlink", "http://www.w3.org/1999/xlink"); - metadata.addNamespace("srv", "http://www.isotc211.org/2005/srv"); - } - /** * Iterate through the content of gmd:otherConstraints to find license information in JSON * diff --git a/src/test/java/de/landsh/opendata/csw2dcat/MDMetadata2DatasetTests.java b/src/test/java/de/landsh/opendata/csw2dcat/MDMetadata2DatasetTests.java index f57d479..51e0f95 100644 --- a/src/test/java/de/landsh/opendata/csw2dcat/MDMetadata2DatasetTests.java +++ b/src/test/java/de/landsh/opendata/csw2dcat/MDMetadata2DatasetTests.java @@ -1402,4 +1402,28 @@ public class MDMetadata2DatasetTests { assertNull(result); } + + @Test + public void createDataService() throws DocumentException { + final Document inputDocument = saxReader.read(getClass().getResourceAsStream("/service1.xml")); + + final Resource result = service.createDataService(inputDocument.getRootElement(), ModelFactory.createDefaultModel()); + + assertNotNull(result); + assertTrue(result.hasProperty(RDF.type, DCAT.DataService)); + assertEquals("Preußische Landesaufnahme (Chronologen, Darstellungsdienst, Schleswig-Holstein) - kostenfrei", + result.getRequiredProperty(DCTerms.title).getLiteral().getString()); + assertEquals("Dieser Darstellungsdienst stellt die Chronologen der Preußischen Landesaufnahme dar. Die historischen Karten sind kulturhistorische Zeitzeugen der Kartographie und spiegeln im Besonderen die politische Machtverteilung wider. Die Preußische Landesaufnahme wurde in den Jahren 1878 bis 1880 aufgenommen und zeigt in den einzelnen Messtischblättern die zu dieser Zeit vorherrschende Topographie. Der Kartenmaßstab der einzelnen Messtischblätter ist 1:25.000 und deckt sich im Blattschnitt mit denen der Topographischen Karte 1:25.000 (TK25). Zusätzlich sind unter dem Begriff \"Chronologen\" Kartenblätter erhältlich, die über die Erstausgabe 1878/80 hinaus als Reproduktionen weitere Ausgaben des Topographischen Kartenwerks 1:25.000 (TK25) bis 1950 beinhalten.", + result.getRequiredProperty(DCTerms.description).getLiteral().getString()); + + assertTrue(result.hasProperty(DCTerms.license, ResourceFactory.createResource("http://dcat-ap.de/def/licenses/dl-by-de/2.0"))); + assertTrue(result.hasLiteral(DCATAPde.licenseAttributionByText, "© GDI-SH")); + assertEquals("2022-05-31", result.getProperty(DCTerms.modified).getLiteral().getString()); + assertEquals("https://service.gdi-sh.de/WMS_SH_FD_Chronologen_KF?Service=wms&version=1.3.0&request=GetCapabilities", + result.getProperty(DCAT.endpointDescription).getResource().getURI()); + assertEquals("https://service.gdi-sh.de/WMS_SH_FD_Chronologen_KF", + result.getProperty(DCAT.endpointURL).getResource().getURI()); + assertTrue(result.hasProperty(DCAT.servesDataset, ResourceFactory.createResource("https://registry.gdi-de.org/id/de.sh/7164d86e-8c55-4e08-ab1f-601b3002d1d1"))); + } + } -- GitLab