diff --git a/src/main/java/de/landsh/opendata/csw2dcat/MDMetadata2Dataset.java b/src/main/java/de/landsh/opendata/csw2dcat/MDMetadata2Dataset.java index 4ade8042716974a1dc2d855a7c0f7d4840cb506c..fb1eb31f94dd8760046216c97b576d2b4ec8a562 100644 --- a/src/main/java/de/landsh/opendata/csw2dcat/MDMetadata2Dataset.java +++ b/src/main/java/de/landsh/opendata/csw2dcat/MDMetadata2Dataset.java @@ -25,10 +25,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; -import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URL; import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; import java.time.LocalDate; import java.util.*; import java.util.stream.Collectors; @@ -46,11 +46,11 @@ public class MDMetadata2Dataset { static IRIFactory factory = IRIFactory.iriImplementation(); private final Model model; private final Map<String, String> keywordMapping = new HashMap<>(); + private final Map<String, JSONObject> url2license = new HashMap<>(); Map<String, String[]> themeMapping = new HashMap<>(); boolean ignoreInvalidMapping = true; private CswInterface cswInterface; private BridgeSettings settings; - private final Map<String, JSONObject> url2license = new HashMap<>(); public MDMetadata2Dataset(Model model, CswInterface cswInterface, BridgeSettings bridgeSettings) { this(model); @@ -513,11 +513,11 @@ public class MDMetadata2Dataset { Resource format = null; if (StringUtils.startsWith(applicationProfile, "http://publications.europa.eu/resource/authority/file-type/")) { format = ResourceFactory.createResource(applicationProfile); - } else if( StringUtils.isNotEmpty(applicationProfile)) { + } else if (StringUtils.isNotEmpty(applicationProfile)) { format = Mapping.mapFormat(applicationProfile); } - if( format== null) { + if (format == null) { // Guess the format. format = guessFormat(url); } @@ -530,7 +530,7 @@ public class MDMetadata2Dataset { format = ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/ATOM"); } else if ("AtomFeed Viewer".equals(name)) { format = ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/HTML"); - } else if( url.contains("feed") && url.toLowerCase().contains("atom") && url.endsWith(".xml")) { + } else if (url.contains("feed") && url.toLowerCase().contains("atom") && url.endsWith(".xml")) { format = ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/ATOM"); } @@ -548,22 +548,22 @@ public class MDMetadata2Dataset { private void addLicenseInformation(Resource resource, List<Node> otherLegalConstraints) { final JSONObject licenseInformation = findLicenseInformation(otherLegalConstraints); - + if (licenseInformation != null) { resource.addLiteral(DCATAPde.licenseAttributionByText, licenseInformation.getString("quelle")); - + if (licenseInformation.has("url")) { String licenseURL = licenseInformation.getString("url"); - + if (licenseURL.startsWith("http://dcat-ap.de/def/licenses/")) { resource.addProperty(DCTerms.license, model.createResource(licenseURL)); } else { final JSONObject license = url2license.get(licenseURL); - + if (license != null && license.has("uri")) { resource.addProperty(DCTerms.license, model.createResource(license.getString("uri"))); } else { - log.info("Unknown license: {}", licenseURL); + log.debug("Unknown license: {}", licenseURL); handleUnknownLicense(resource, licenseInformation.getString("id")); } } @@ -572,18 +572,14 @@ public class MDMetadata2Dataset { } } } - + private void handleUnknownLicense(Resource resource, String licenseId) { - try { - licenseId = URLEncoder.encode(licenseId, "UTF-8") + licenseId = URLEncoder.encode(licenseId, StandardCharsets.UTF_8) .replaceAll("%2F", "/"); // Replace the encoded slash with the original slash - resource.addProperty(DCTerms.license, model.createResource("http://dcat-ap.de/def/licenses/" + licenseId)); - log.info("Falling back to: {}", licenseId); - } catch (UnsupportedEncodingException e) { - log.info("Unable to encode licenseId: {}", e); - } + resource.addProperty(DCTerms.license, model.createResource("http://dcat-ap.de/def/licenses/" + licenseId)); + log.debug("Falling back to: {}", licenseId); } - + private Resource guessFormat(String url) { final String lowerCaseURL = url.toLowerCase(); if (lowerCaseURL.contains("service=wms")) { @@ -593,7 +589,7 @@ public class MDMetadata2Dataset { log.info("WMS with output format: {}", url); return null; } - } else if (lowerCaseURL.contains("service=wfs") && lowerCaseURL.contains("getcapabilities")) { + } else if (lowerCaseURL.contains("service=wfs") && lowerCaseURL.contains("getcapabilities")) { return ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/WFS_SRVC"); } else if (lowerCaseURL.contains("service=wfs")) { if (!lowerCaseURL.contains("outputformat=")) { @@ -666,13 +662,13 @@ public class MDMetadata2Dataset { final String title = getTextOrNull(metadata.selectSingleNode("gmd:identificationInfo/*/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString")); if (title != null) { - if( title.contains("#locale-eng:")) { + if (title.contains("#locale-eng:")) { // In some source systems, German and English titles are strangely combined in one text. String germanTitle = StringUtils.substringBefore(title, "#locale-eng:"); String englishTitle = StringUtils.substringAfter(title, "#locale-eng:"); dataset.addLiteral(DCTerms.title, model.createLiteral(germanTitle.trim(), LANGUAGE_DE)); dataset.addLiteral(DCTerms.title, model.createLiteral(englishTitle.trim(), LANGUAGE_EN)); - } else { + } else { dataset.addLiteral(DCTerms.title, model.createLiteral(title.trim(), LANGUAGE_DE)); } } @@ -817,7 +813,7 @@ public class MDMetadata2Dataset { dataset.addProperty(DCTerms.conformsTo, model.createResource("http://data.europa.eu/eli/reg/2010/1089/oj")); } else if ("VERORDNUNG (EG) Nr. 976/2009 DER KOMMISSION vom 19. Oktober 2009 zur Durchführung der Richtlinie 2007/2/EG des Europäischen Parlaments und des Rates hinsichtlich der Netzdienste".equals(identifier)) { dataset.addProperty(DCTerms.conformsTo, model.createResource("http://data.europa.eu/eli/reg/2009/976/oj")); - } else if ( StringUtils.contains(identifier, "Verordnung (EG) Nr. 1205/2008")) { + } else if (StringUtils.contains(identifier, "Verordnung (EG) Nr. 1205/2008")) { dataset.addProperty(DCTerms.conformsTo, model.createResource("http://data.europa.eu/eli/reg/2008/1205/oj")); } } @@ -832,19 +828,19 @@ public class MDMetadata2Dataset { metadata.selectNodes("gmd:identificationInfo/*/gmd:descriptiveKeywords/gmd:MD_Keywords[" + "gmd:thesaurusName/gmd:CI_Citation/gmd:title/gco:CharacterString = 'High-value dataset categories'" + " or gmd:thesaurusName/gmd:CI_Citation/gmd:title/gmx:Anchor/@xlink:href = 'http://data.europa.eu/bna/asd487ae75']"); - if( !hvdRelevantKeywordNodes.isEmpty()) { + if (!hvdRelevantKeywordNodes.isEmpty()) { dataset.addProperty(DCATAP.applicableLegislation, model.createResource("http://data.europa.eu/eli/reg_impl/2023/138/oj")); } for (Node keyword : hvdRelevantKeywordNodes) { Node characterString = keyword.selectSingleNode("./gmd:keyword/gco:CharacterString/text()"); Node anchorRef = keyword.selectSingleNode("./gmd:keyword/gmx:Anchor/@xlink:href"); - if( characterString != null) { - dataset.addProperty(DCATAP.hvdCategory, Mapping.mapHvdCategory(characterString.getStringValue())); + if (characterString != null) { + dataset.addProperty(DCATAP.hvdCategory, Mapping.mapHvdCategory(characterString.getStringValue())); } - if( anchorRef != null) { - dataset.addProperty(DCATAP.hvdCategory, model.createResource( anchorRef.getStringValue())); + if (anchorRef != null) { + dataset.addProperty(DCATAP.hvdCategory, model.createResource(anchorRef.getStringValue())); } } @@ -867,7 +863,7 @@ public class MDMetadata2Dataset { // add coupled services if (cswInterface != null && searchId != null) { - List<Element> services ; + List<Element> services; try { services = cswInterface.findOperatesOn(searchId); } catch (Exception e) { @@ -936,7 +932,6 @@ public class MDMetadata2Dataset { } - private boolean isMultiFormatDataset(Element metadata) { final List<Node> formatElements = metadata.selectNodes("gmd:distributionInfo/*/gmd:distributionFormat"); return formatElements.size() > 1;