Skip to content
Snippets Groups Projects
Commit 60c5ff8a authored by Jesper Zedlitz's avatar Jesper Zedlitz
Browse files

Merge branch 'main' into generic-iso2dcat

parents 98702e10 2a520d9d
Branches
No related tags found
No related merge requests found
Pipeline #691 passed
Showing with 2176 additions and 40 deletions
...@@ -34,7 +34,7 @@ ...@@ -34,7 +34,7 @@
<dependency> <dependency>
<groupId>ch.qos.logback</groupId> <groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId> <artifactId>logback-classic</artifactId>
<version>1.4.14</version> <version>1.5.6</version>
</dependency> </dependency>
<!-- https://mvnrepository.com/artifact/jaxen/jaxen --> <!-- https://mvnrepository.com/artifact/jaxen/jaxen -->
...@@ -50,7 +50,7 @@ ...@@ -50,7 +50,7 @@
<dependency> <dependency>
<groupId>org.junit.jupiter</groupId> <groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId> <artifactId>junit-jupiter-api</artifactId>
<version>5.10.1</version> <version>5.10.2</version>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
...@@ -58,20 +58,20 @@ ...@@ -58,20 +58,20 @@
<dependency> <dependency>
<groupId>org.json</groupId> <groupId>org.json</groupId>
<artifactId>json</artifactId> <artifactId>json</artifactId>
<version>20231013</version> <version>20240303</version>
</dependency> </dependency>
<!-- https://mvnrepository.com/artifact/org.apache.httpcomponents.client5/httpclient5 --> <!-- https://mvnrepository.com/artifact/org.apache.httpcomponents.client5/httpclient5 -->
<dependency> <dependency>
<groupId>org.apache.httpcomponents.client5</groupId> <groupId>org.apache.httpcomponents.client5</groupId>
<artifactId>httpclient5</artifactId> <artifactId>httpclient5</artifactId>
<version>5.3</version> <version>5.3.1</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.mockito</groupId> <groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId> <artifactId>mockito-core</artifactId>
<version>5.8.0</version> <version>5.11.0</version>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
...@@ -84,7 +84,7 @@ ...@@ -84,7 +84,7 @@
<dependency> <dependency>
<groupId>uk.org.webcompere</groupId> <groupId>uk.org.webcompere</groupId>
<artifactId>system-stubs-jupiter</artifactId> <artifactId>system-stubs-jupiter</artifactId>
<version>2.1.5</version> <version>2.1.6</version>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
...@@ -92,13 +92,13 @@ ...@@ -92,13 +92,13 @@
<dependency> <dependency>
<groupId>io.undertow</groupId> <groupId>io.undertow</groupId>
<artifactId>undertow-core</artifactId> <artifactId>undertow-core</artifactId>
<version>2.3.10.Final</version> <version>2.3.12.Final</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.junit.jupiter</groupId> <groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-engine</artifactId> <artifactId>junit-jupiter-engine</artifactId>
<version>5.10.1</version> <version>5.10.2</version>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
......
package de.landsh.opendata.csw2dcat; package de.landsh.opendata.csw2dcat;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils; import org.apache.commons.lang3.math.NumberUtils;
import org.apache.hc.client5.http.classic.methods.HttpPost; import org.apache.hc.client5.http.classic.methods.HttpPost;
...@@ -16,7 +17,6 @@ import org.slf4j.Logger; ...@@ -16,7 +17,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.io.File; import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter; import java.io.FileWriter;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
...@@ -45,11 +45,6 @@ public class CswInterface { ...@@ -45,11 +45,6 @@ public class CswInterface {
private String[] typeInclude; private String[] typeInclude;
private CloseableHttpClient httpClient = HttpClients.createMinimal(); private CloseableHttpClient httpClient = HttpClients.createMinimal();
private String dumpCswResponse = null; private String dumpCswResponse = null;
public void setVerboseLogging(String verboseLogging) {
this.verboseLogging = verboseLogging;
}
/** /**
* Name of a directory. If set all incoming messages will be logged into files in this directory. * Name of a directory. If set all incoming messages will be logged into files in this directory.
*/ */
...@@ -75,6 +70,10 @@ public class CswInterface { ...@@ -75,6 +70,10 @@ public class CswInterface {
typeInclude = settings.typeInclude; typeInclude = settings.typeInclude;
} }
public void setVerboseLogging(String verboseLogging) {
this.verboseLogging = verboseLogging;
}
public void setTypeExclude(String[] typeExclude) { public void setTypeExclude(String[] typeExclude) {
this.typeExclude = typeExclude; this.typeExclude = typeExclude;
} }
...@@ -300,7 +299,7 @@ public class CswInterface { ...@@ -300,7 +299,7 @@ public class CswInterface {
filter.append(" <ogc:PropertyIsEqualTo>" + filter.append(" <ogc:PropertyIsEqualTo>" +
" <ogc:PropertyName>operatesOn</ogc:PropertyName>" + " <ogc:PropertyName>operatesOn</ogc:PropertyName>" +
" <ogc:Literal>") " <ogc:Literal>")
.append(id) .append(StringEscapeUtils.escapeXml11(id))
.append("</ogc:Literal>") .append("</ogc:Literal>")
.append(" </ogc:PropertyIsEqualTo>\n"); .append(" </ogc:PropertyIsEqualTo>\n");
......
...@@ -27,6 +27,8 @@ import org.slf4j.LoggerFactory; ...@@ -27,6 +27,8 @@ import org.slf4j.LoggerFactory;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.time.LocalDate; import java.time.LocalDate;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
...@@ -37,12 +39,14 @@ import java.util.stream.Collectors; ...@@ -37,12 +39,14 @@ import java.util.stream.Collectors;
public class MDMetadata2Dataset { public class MDMetadata2Dataset {
private static final String LANGUAGE_DE = "de"; private static final String LANGUAGE_DE = "de";
private static final String LANGUAGE_EN = "en";
private static final Logger log = LoggerFactory.getLogger(MDMetadata2Dataset.class); private static final Logger log = LoggerFactory.getLogger(MDMetadata2Dataset.class);
private static final String XPATH_OPERATION_GET_CAPABILITIES = "gmd:identificationInfo/*/srv:containsOperations/srv:SV_OperationMetadata[srv:operationName/gco:CharacterString/text()='GetCapabilities']"; private static final String XPATH_OPERATION_GET_CAPABILITIES = "gmd:identificationInfo/*/srv:containsOperations/srv:SV_OperationMetadata[srv:operationName/gco:CharacterString/text()='GetCapabilities']";
private static final Property PROPERTY_applicable_Legislation = ResourceFactory.createProperty("http://data.europa.eu/r5r/", "applicableLegislation");
static IRIFactory factory = IRIFactory.iriImplementation(); static IRIFactory factory = IRIFactory.iriImplementation();
private final Model model; private final Model model;
private final Map<String, String> keywordMapping = new HashMap<>(); private final Map<String, String> keywordMapping = new HashMap<>();
private final Map<String, JSONObject> url2license = new HashMap<>();
Map<String, String[]> themeMapping = new HashMap<>(); Map<String, String[]> themeMapping = new HashMap<>();
boolean ignoreInvalidMapping = true; boolean ignoreInvalidMapping = true;
private CswInterface cswInterface; private CswInterface cswInterface;
...@@ -77,6 +81,12 @@ public class MDMetadata2Dataset { ...@@ -77,6 +81,12 @@ public class MDMetadata2Dataset {
final String value = entry.getString(key); final String value = entry.getString(key);
keywordMapping.put(key, value); keywordMapping.put(key, value);
} }
JSONArray licenses = new JSONArray(new JSONTokener(Objects.requireNonNull(getClass().getResourceAsStream("/licenses.json"))));
for (Object l : licenses) {
JSONObject license = (JSONObject) l;
url2license.put(license.getString("uri"), license);
}
} }
static String getTextOrNull(Node node) { static String getTextOrNull(Node node) {
...@@ -501,10 +511,14 @@ public class MDMetadata2Dataset { ...@@ -501,10 +511,14 @@ public class MDMetadata2Dataset {
final String applicationProfile = getTextOrNull(onlineResource.selectSingleNode("gmd:applicationProfile/*")); final String applicationProfile = getTextOrNull(onlineResource.selectSingleNode("gmd:applicationProfile/*"));
Resource format; Resource format = null;
if (StringUtils.startsWith(applicationProfile, "http://publications.europa.eu/resource/authority/file-type/")) { if (StringUtils.startsWith(applicationProfile, "http://publications.europa.eu/resource/authority/file-type/")) {
format = ResourceFactory.createResource(applicationProfile); format = ResourceFactory.createResource(applicationProfile);
} else { } else if (StringUtils.isNotEmpty(applicationProfile)) {
format = Mapping.mapFormat(applicationProfile);
}
if (format == null) {
// Guess the format. // Guess the format.
format = guessFormat(url); format = guessFormat(url);
} }
...@@ -520,6 +534,7 @@ public class MDMetadata2Dataset { ...@@ -520,6 +534,7 @@ public class MDMetadata2Dataset {
} else if( url.contains("feed") && url.toLowerCase().contains("atom") && url.endsWith(".xml")) { } else if( url.contains("feed") && url.toLowerCase().contains("atom") && url.endsWith(".xml")) {
format = ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/ATOM"); format = ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/ATOM");
} }
} }
...@@ -533,18 +548,39 @@ public class MDMetadata2Dataset { ...@@ -533,18 +548,39 @@ public class MDMetadata2Dataset {
} }
private void addLicenseInformation(Resource resource, List<Node> otherLegalConstraints) { private void addLicenseInformation(Resource resource, List<Node> otherLegalConstraints) {
final JSONObject licenceInformation = findLicenseInformation(otherLegalConstraints); final JSONObject licenseInformation = findLicenseInformation(otherLegalConstraints);
if (licenceInformation != null) {
resource.addLiteral(DCATAPde.licenseAttributionByText, licenceInformation.getString("quelle")); if (licenseInformation != null) {
if (licenceInformation.has("url") && licenceInformation.getString("url").startsWith("http://dcat-ap.de/def/licenses/")) { resource.addLiteral(DCATAPde.licenseAttributionByText, licenseInformation.getString("quelle"));
// Some publishers specify the licence as the URL and not the id.
resource.addProperty(DCTerms.license, model.createResource(licenceInformation.getString("url"))); if (licenseInformation.has("url")) {
String licenseURL = licenseInformation.getString("url");
if (licenseURL.startsWith("http://dcat-ap.de/def/licenses/")) {
resource.addProperty(DCTerms.license, model.createResource(licenseURL));
} else { } else {
resource.addProperty(DCTerms.license, model.createResource("http://dcat-ap.de/def/licenses/" + licenceInformation.getString("id"))); final JSONObject license = url2license.get(licenseURL);
if (license != null && license.has("uri")) {
resource.addProperty(DCTerms.license, model.createResource(license.getString("uri")));
} else {
log.debug("Unknown license: {}", licenseURL);
handleUnknownLicense(resource, licenseInformation.getString("id"));
}
}
} else {
handleUnknownLicense(resource, licenseInformation.getString("id"));
} }
} }
} }
private void handleUnknownLicense(Resource resource, String licenseId) {
licenseId = URLEncoder.encode(licenseId, StandardCharsets.UTF_8)
.replaceAll("%2F", "/"); // Replace the encoded slash with the original slash
resource.addProperty(DCTerms.license, model.createResource("http://dcat-ap.de/def/licenses/" + licenseId));
log.debug("Falling back to: {}", licenseId);
}
private Resource guessFormat(String url) { private Resource guessFormat(String url) {
final String lowerCaseURL = url.toLowerCase(); final String lowerCaseURL = url.toLowerCase();
if (lowerCaseURL.contains("service=wms")) { if (lowerCaseURL.contains("service=wms")) {
...@@ -554,9 +590,12 @@ public class MDMetadata2Dataset { ...@@ -554,9 +590,12 @@ public class MDMetadata2Dataset {
log.info("WMS with output format: {}", url); log.info("WMS with output format: {}", url);
return null; return null;
} }
} else if (lowerCaseURL.contains("service=wfs") && lowerCaseURL.contains("getcapabilities")) {
return ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/WFS_SRVC");
} else if (lowerCaseURL.contains("service=wfs")) { } else if (lowerCaseURL.contains("service=wfs")) {
if (!lowerCaseURL.contains("outputformat=")) { if (!lowerCaseURL.contains("outputformat=")) {
return ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/WFS_SRVC"); // assume that the WFS returns GML als standard output format
return ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/GML");
} else if (lowerCaseURL.contains("outputformat=shape-zip")) { } else if (lowerCaseURL.contains("outputformat=shape-zip")) {
return ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/SHP"); return ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/SHP");
} else if (lowerCaseURL.contains("outputformat=csv")) { } else if (lowerCaseURL.contains("outputformat=csv")) {
...@@ -565,13 +604,15 @@ public class MDMetadata2Dataset { ...@@ -565,13 +604,15 @@ public class MDMetadata2Dataset {
log.info("WFS with unkown output format: {}", url); log.info("WFS with unkown output format: {}", url);
return null; return null;
} }
} else if (lowerCaseURL.endsWith(".html")) {
return ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/HTML");
} }
return null; return null;
} }
/** /**
* Convert a {@Document} containin a gmd:MD_Metadata element into a DCAT-AP.de compliant dcat:Dataset {@link Resource}. * Convert a {@link Document} containin a gmd:MD_Metadata element into a DCAT-AP.de compliant dcat:Dataset {@link Resource}.
*/ */
public Resource convert(Document input) throws DocumentException, IOException { public Resource convert(Document input) throws DocumentException, IOException {
return convert(input.getRootElement()); return convert(input.getRootElement());
...@@ -612,13 +653,26 @@ public class MDMetadata2Dataset { ...@@ -612,13 +653,26 @@ public class MDMetadata2Dataset {
dataset.addProperty(RDF.type, DCAT.Dataset); dataset.addProperty(RDF.type, DCAT.Dataset);
if (scopeCode != null) { if (scopeCode != null) {
try {
dataset.addProperty(DCTerms.type, model.createResource(Mapping.mapScopeCode(scopeCode))); dataset.addProperty(DCTerms.type, model.createResource(Mapping.mapScopeCode(scopeCode)));
} catch (IllegalArgumentException ignore) {
log.warn("Dataset {} has an invalid scopeCode.", id);
}
} }
final String title = getTextOrNull(metadata.selectSingleNode("gmd:identificationInfo/*/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString")); final String title = getTextOrNull(metadata.selectSingleNode("gmd:identificationInfo/*/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString"));
if (title != null) { if (title != null) {
if (title.contains("#locale-eng:")) {
// In some source systems, German and English titles are strangely combined in one text.
String germanTitle = StringUtils.substringBefore(title, "#locale-eng:");
String englishTitle = StringUtils.substringAfter(title, "#locale-eng:");
dataset.addLiteral(DCTerms.title, model.createLiteral(germanTitle.trim(), LANGUAGE_DE));
dataset.addLiteral(DCTerms.title, model.createLiteral(englishTitle.trim(), LANGUAGE_EN));
} else {
dataset.addLiteral(DCTerms.title, model.createLiteral(title.trim(), LANGUAGE_DE)); dataset.addLiteral(DCTerms.title, model.createLiteral(title.trim(), LANGUAGE_DE));
} }
}
final String description = getTextOrNull(metadata.selectSingleNode("gmd:identificationInfo/*/gmd:abstract/gco:CharacterString")); final String description = getTextOrNull(metadata.selectSingleNode("gmd:identificationInfo/*/gmd:abstract/gco:CharacterString"));
if (description != null) { if (description != null) {
...@@ -643,13 +697,21 @@ public class MDMetadata2Dataset { ...@@ -643,13 +697,21 @@ public class MDMetadata2Dataset {
final Element characterSet = (Element) metadata.selectSingleNode("gmd:characterSet/gmd:MD_CharacterSetCode"); final Element characterSet = (Element) metadata.selectSingleNode("gmd:characterSet/gmd:MD_CharacterSetCode");
if (characterSet != null) { if (characterSet != null) {
try {
dataset.addLiteral(Content.characterEncoding, dataset.addLiteral(Content.characterEncoding,
model.createTypedLiteral(Mapping.mapCharacterSetCode(characterSet), XSDDatatype.XSDstring)); model.createTypedLiteral(Mapping.mapCharacterSetCode(characterSet), XSDDatatype.XSDstring));
} catch (IllegalArgumentException ignore) {
log.warn("Dataset {} has an invalid characterEncoding.", id);
}
} }
final Element maintenanceFrequencyCode = (Element) metadata.selectSingleNode("gmd:identificationInfo/*/gmd:resourceMaintenance/gmd:MD_MaintenanceInformation/gmd:maintenanceAndUpdateFrequency/gmd:MD_MaintenanceFrequencyCode"); final Element maintenanceFrequencyCode = (Element) metadata.selectSingleNode("gmd:identificationInfo/*/gmd:resourceMaintenance/gmd:MD_MaintenanceInformation/gmd:maintenanceAndUpdateFrequency/gmd:MD_MaintenanceFrequencyCode");
if (maintenanceFrequencyCode != null) { if (maintenanceFrequencyCode != null) {
try {
dataset.addProperty(DCTerms.accrualPeriodicity, model.createResource(Mapping.mapMaintenanceFrequencyCode(maintenanceFrequencyCode))); dataset.addProperty(DCTerms.accrualPeriodicity, model.createResource(Mapping.mapMaintenanceFrequencyCode(maintenanceFrequencyCode)));
} catch (IllegalArgumentException ignore) {
log.warn("Dataset {} has an invalid maintenanceFrequencyCode.", id);
}
} }
final List<Node> topicCategories = metadata.selectNodes("gmd:identificationInfo/*/gmd:topicCategory/*"); final List<Node> topicCategories = metadata.selectNodes("gmd:identificationInfo/*/gmd:topicCategory/*");
...@@ -752,14 +814,35 @@ public class MDMetadata2Dataset { ...@@ -752,14 +814,35 @@ public class MDMetadata2Dataset {
dataset.addProperty(DCTerms.conformsTo, model.createResource("http://data.europa.eu/eli/reg/2010/1089/oj")); dataset.addProperty(DCTerms.conformsTo, model.createResource("http://data.europa.eu/eli/reg/2010/1089/oj"));
} else if ("VERORDNUNG (EG) Nr. 976/2009 DER KOMMISSION vom 19. Oktober 2009 zur Durchführung der Richtlinie 2007/2/EG des Europäischen Parlaments und des Rates hinsichtlich der Netzdienste".equals(identifier)) { } else if ("VERORDNUNG (EG) Nr. 976/2009 DER KOMMISSION vom 19. Oktober 2009 zur Durchführung der Richtlinie 2007/2/EG des Europäischen Parlaments und des Rates hinsichtlich der Netzdienste".equals(identifier)) {
dataset.addProperty(DCTerms.conformsTo, model.createResource("http://data.europa.eu/eli/reg/2009/976/oj")); dataset.addProperty(DCTerms.conformsTo, model.createResource("http://data.europa.eu/eli/reg/2009/976/oj"));
} else if (StringUtils.contains(identifier, "Verordnung (EG) Nr. 1205/2008")) {
dataset.addProperty(DCTerms.conformsTo, model.createResource("http://data.europa.eu/eli/reg/2008/1205/oj"));
} }
} }
// convert INSPIRE keywords into dcatap:applicableLegislation properties // convert INSPIRE keywords into dcatap:applicableLegislation properties
if (descriptiveKeywords.contains("inspireidentifiziert") if (descriptiveKeywords.contains("inspireidentifiziert")) {
|| descriptiveKeywords.contains("INSPIRE") dataset.addProperty(DCATAP.applicableLegislation, model.createResource("http://data.europa.eu/eli/dir/2007/2/2019-06-26"));
|| descriptiveKeywords.contains("inspire")) { }
dataset.addProperty(PROPERTY_applicable_Legislation, model.createResource("http://data.europa.eu/eli/dir/2007/2/2019-06-26"));
// Create the HVD properties in accordance with the GDI-DE mapping rules of 2024-03-20
final List<Node> hvdRelevantKeywordNodes =
metadata.selectNodes("gmd:identificationInfo/*/gmd:descriptiveKeywords/gmd:MD_Keywords[" +
"gmd:thesaurusName/gmd:CI_Citation/gmd:title/gco:CharacterString = 'High-value dataset categories'" +
" or gmd:thesaurusName/gmd:CI_Citation/gmd:title/gmx:Anchor/@xlink:href = 'http://data.europa.eu/bna/asd487ae75']");
if (!hvdRelevantKeywordNodes.isEmpty()) {
dataset.addProperty(DCATAP.applicableLegislation, model.createResource("http://data.europa.eu/eli/reg_impl/2023/138/oj"));
}
for (Node keyword : hvdRelevantKeywordNodes) {
Node characterString = keyword.selectSingleNode("./gmd:keyword/gco:CharacterString/text()");
Node anchorRef = keyword.selectSingleNode("./gmd:keyword/gmx:Anchor/@xlink:href");
if (characterString != null) {
dataset.addProperty(DCATAP.hvdCategory, Mapping.mapHvdCategory(characterString.getStringValue()));
}
if (anchorRef != null) {
dataset.addProperty(DCATAP.hvdCategory, model.createResource(anchorRef.getStringValue()));
}
} }
// convert preview image // convert preview image
...@@ -781,7 +864,13 @@ public class MDMetadata2Dataset { ...@@ -781,7 +864,13 @@ public class MDMetadata2Dataset {
// add coupled services // add coupled services
if (cswInterface != null && searchId != null) { if (cswInterface != null && searchId != null) {
final List<Element> services = cswInterface.findOperatesOn(searchId); List<Element> services;
try {
services = cswInterface.findOperatesOn(searchId);
} catch (Exception e) {
log.error("Error searching for coupled services of dataset {}: {}", id, e.getMessage());
services = Collections.emptyList();
}
for (Element service : services) { for (Element service : services) {
try { try {
for (Resource dist : convertServiceToDistributions(service)) { for (Resource dist : convertServiceToDistributions(service)) {
...@@ -843,6 +932,7 @@ public class MDMetadata2Dataset { ...@@ -843,6 +932,7 @@ public class MDMetadata2Dataset {
return dataset; return dataset;
} }
private boolean isMultiFormatDataset(Element metadata) { private boolean isMultiFormatDataset(Element metadata) {
final List<Node> formatElements = metadata.selectNodes("gmd:distributionInfo/*/gmd:distributionFormat"); final List<Node> formatElements = metadata.selectNodes("gmd:distributionInfo/*/gmd:distributionFormat");
return formatElements.size() > 1; return formatElements.size() > 1;
...@@ -915,6 +1005,10 @@ public class MDMetadata2Dataset { ...@@ -915,6 +1005,10 @@ public class MDMetadata2Dataset {
} }
} }
if (formatResource == null) {
formatResource = guessFormat(url);
}
if (formatResource != null) { if (formatResource != null) {
distribution.addProperty(DCTerms.format, formatResource); distribution.addProperty(DCTerms.format, formatResource);
} else if (!ignoreInvalidMapping) { } else if (!ignoreInvalidMapping) {
......
...@@ -204,9 +204,30 @@ public class Mapping { ...@@ -204,9 +204,30 @@ public class Mapping {
return ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/CSV"); return ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/CSV");
} else if ("HTML".equals(applicationProfile)) { } else if ("HTML".equals(applicationProfile)) {
return ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/HTML"); return ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/HTML");
} else if ("GML".equals(applicationProfile)) {
return ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/GML");
} else { } else {
log.info("{} is not an allowed applicationProfile.", applicationProfile); log.info("{} is not an allowed applicationProfile.", applicationProfile);
throw new IllegalArgumentException(applicationProfile + " is not an allowed applicationProfile."); throw new IllegalArgumentException(applicationProfile + " is not an allowed applicationProfile.");
} }
} }
public static Resource mapHvdCategory(String germanName) {
if ("Georaum".equals(germanName)) {
return ResourceFactory.createResource("http://data.europa.eu/bna/c_ac64a52d");
} else if ("Erdbeobachtung und Umwelt".equals(germanName)) {
return ResourceFactory.createResource("http://data.europa.eu/bna/c_dd313021");
} else if ("Meteorologie".equals(germanName)) {
return ResourceFactory.createResource("http://data.europa.eu/bna/c_164e0bf5");
} else if ("Statistik".equals(germanName)) {
return ResourceFactory.createResource("http://data.europa.eu/bna/c_e1da4e07");
} else if ("Unternehmen und Eigentümerschaft von Unternehmen".equals(germanName)) {
return ResourceFactory.createResource("http://data.europa.eu/bna/c_a9135398");
} else if ("Mobilität".equals(germanName)) {
return ResourceFactory.createResource("http://data.europa.eu/bna/c_b79e35eb");
} else {
log.info("{} is not an allowed HVD category.", germanName);
throw new IllegalArgumentException(germanName + " is not an allowed HVD category.");
}
}
} }
package de.landsh.opendata.csw2dcat.vocabulary;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.Property;
import org.apache.jena.rdf.model.Resource;
public class DCATAP {
public static final String NS = "http://data.europa.eu/r5r/";
public static final Resource NAMESPACE;
public static final Property applicableLegislation;
public static final Property hvdCategory;
private static final Model m_model = ModelFactory.createDefaultModel();
static {
NAMESPACE = m_model.createResource(NS);
applicableLegislation = m_model.createProperty(NS, "applicableLegislation");
hvdCategory = m_model.createProperty(NS, "hvdCategory");
}
private DCATAP() {
}
public static String getURI() {
return NS;
}
}
...@@ -30,6 +30,7 @@ public class MDMetadata2DatasetTests { ...@@ -30,6 +30,7 @@ public class MDMetadata2DatasetTests {
private static final Resource FILE_TYPE_WFS_SRC = ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/WFS_SRVC"); private static final Resource FILE_TYPE_WFS_SRC = ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/WFS_SRVC");
private static final Resource FILE_TYPE_CSV = ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/CSV"); private static final Resource FILE_TYPE_CSV = ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/CSV");
private static final Resource FILE_TYPE_SHP = ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/SHP"); private static final Resource FILE_TYPE_SHP = ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/SHP");
private static final Resource FILE_TYPE_GML = ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/GML");
private static final Property PROPERTY_applicable_Legislation = ResourceFactory.createProperty("http://data.europa.eu/r5r/", "applicableLegislation"); private static final Property PROPERTY_applicable_Legislation = ResourceFactory.createProperty("http://data.europa.eu/r5r/", "applicableLegislation");
static Namespace NS_GMD = Namespace.get("gmd", "http://www.isotc211.org/2005/gmd"); static Namespace NS_GMD = Namespace.get("gmd", "http://www.isotc211.org/2005/gmd");
...@@ -1288,6 +1289,88 @@ public class MDMetadata2DatasetTests { ...@@ -1288,6 +1289,88 @@ public class MDMetadata2DatasetTests {
assertEquals(1, distributionHTML, "one HTML distribution (the Atom feed viewer)"); assertEquals(1, distributionHTML, "one HTML distribution (the Atom feed viewer)");
assertEquals(1, distributionATOM, "one ATOM distribution (the feed itself)"); assertEquals(1, distributionATOM, "one ATOM distribution (the feed itself)");
}
/**
* An incorrect code list specification (in this case for MD_MaintenanceFrequencyCode) must not stop the process.
*/
@Test
public void convert_incorrectMaintenanceFrequencyCode() throws DocumentException, IOException {
settings.findWMSinInfo = true;
final Document inputDocument = saxReader.read(getClass().getResourceAsStream("/bb585473-0987-436a-bc7a-132880a4df0d.xml"));
final Resource dataset = service.convert(inputDocument);
assertNotNull(dataset);
assertEquals("bb585473-0987-436a-bc7a-132880a4df0d", dataset.getProperty(DCTerms.identifier).getString());
}
/**
* Detect multiple formats (SHP and GML) in one dataset.
* The WMS link is so broken that it won't be detected as a WMS.
*/
@Test
public void convert_multiFormat() throws DocumentException, IOException {
settings.findWMSinInfo = true;
final Document inputDocument = saxReader.read(getClass().getResourceAsStream("/6782583C-9747-4C16-B94D-3EAF2BFDB171.xml"));
final Resource result = service.convert(inputDocument);
final Map<String, Resource> distributionMap = collectDistributions(result);
assertEquals(2, distributionMap.size());
Resource distribution = distributionMap.get("https://opendata.schleswig-holstein.de/dataset/d501a7ab-a85e-4988-a98e-8f3ae3ab5df2/resource/1bcad7cd-9c94-4a06-bbbc-c7929bb37751/download/lsg.zip");
assertEquals(FILE_TYPE_SHP, distribution.getProperty(DCTerms.format).getResource());
assertEquals("Landschaftsschutzgebiete (1:25.000)", distribution.getProperty(DCTerms.title).getString());
Resource distribution2 = distributionMap.get("https://umweltgeodienste.schleswig-holstein.de/WFS_UWAT?service=wfs&version=2.0.0&request=GetFeature&typeNames=app:lsg");
assertEquals(FILE_TYPE_GML, distribution2.getProperty(DCTerms.format).getResource());
assertEquals("WFS Landschaftsschutzgebiete (1:25.000)", distribution2.getProperty(DCTerms.title).getString());
}
/**
* Specification of the HVD category as free text in characterString elements
* (in accordance with the GDI-DE mapping rules of 2024-03-20 variant a)
*/
@Test
public void convert_highValueDataset_a() throws DocumentException, IOException {
final Document inputDocument = saxReader.read(getClass().getResourceAsStream("/hvd1.xml"));
final Resource result = service.convert(inputDocument);
assertEquals("http://data.europa.eu/eli/reg_impl/2023/138/oj",
result.getPropertyResourceValue(DCATAP.applicableLegislation).getURI());
assertEquals("http://data.europa.eu/bna/c_ac64a52d",
result.getPropertyResourceValue(DCATAP.hvdCategory).getURI());
} }
/**
* Referencing the EU vocabulary for HVD categories as gmx:Anchor elements
* (in accordance with the GDI-DE mapping rules of 2024-03-20 variant b)
*/
@Test
public void convert_highValueDataset_b() throws DocumentException, IOException {
final Document inputDocument = saxReader.read(getClass().getResourceAsStream("/hvd2.xml"));
final Resource result = service.convert(inputDocument);
assertEquals("http://data.europa.eu/eli/reg_impl/2023/138/oj",
result.getPropertyResourceValue(DCATAP.applicableLegislation).getURI());
assertEquals("http://data.europa.eu/bna/c_ac64a52d",
result.getPropertyResourceValue(DCATAP.hvdCategory).getURI());
}
/**
* In some source systems, German and English titles are strangely combined in one text.
*/
@Test
public void convert_mixedGermanEnglishTitles() throws DocumentException, IOException {
final Document inputDocument = saxReader.read(getClass().getResourceAsStream("/00729c12-e689-446d-83bd-dd8cfd0eeeb8.xml"));
final Resource result = service.convert(inputDocument);
assertTrue(result.hasProperty( DCTerms.title, ResourceFactory.createLangLiteral(
"Robben im Schleswig-Holsteinischen Wattenmeer 1995", "de") ));
assertTrue(result.hasProperty( DCTerms.title, ResourceFactory.createLangLiteral(
"Seals in the Schleswig-Holstein Wadden Sea in 1995", "en") ));
}
} }
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment