diff --git a/src/main/java/de/landsh/opendata/catalogproxy/CatalogFilter.java b/src/main/java/de/landsh/opendata/catalogproxy/CatalogFilter.java index daababc4a235d8cdc809491d2a62d9936fb38d6e..4311cc1ca23af2dee906c2ab4fbb6490d356b96d 100644 --- a/src/main/java/de/landsh/opendata/catalogproxy/CatalogFilter.java +++ b/src/main/java/de/landsh/opendata/catalogproxy/CatalogFilter.java @@ -9,6 +9,8 @@ import org.apache.jena.util.ResourceUtils; import org.apache.jena.vocabulary.DCAT; import org.apache.jena.vocabulary.DCTerms; import org.apache.jena.vocabulary.RDF; +import org.springframework.beans.factory.BeanInitializationException; +import org.springframework.beans.factory.InitializingBean; import org.springframework.beans.factory.annotation.Value; import java.io.InputStream; @@ -17,7 +19,7 @@ import java.util.*; /** * Filtert eine DCAT-AP.de konforme catalog.xml Datei nach festgelegten Kriterien. */ -public class CatalogFilter { +public class CatalogFilter implements InitializingBean { private static final Collection<Resource> UNWANTED_FORMATS = Arrays.asList( @@ -27,7 +29,9 @@ public class CatalogFilter { ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/HTML") ); private static final Property LOCN_GEOMETRY = ResourceFactory.createProperty("http://www.w3.org/ns/locn#geometry"); - + final private Map<String, String> urlReplacements = new HashMap<>(); + @Value("#{${replaceURL:''}}") + List<String> replaceURL; @Value("${baseURL:http://localhost:8080/}") private String baseURL; @@ -63,14 +67,15 @@ public class CatalogFilter { removeUnusedLocations(model); minimizeLocations(model); rewriteHydraURLs(model); + rewriteDownloadAndAccessURLs(model); addDownloadURLs(model); return model; } /** - * Add downloadURL properties to Distributions. The German DCAT-AP.de treats downloadURL as an no so - * important optional properties and relies the the accessURL. However, the European data portal values the + * Add downloadURL properties to Distributions. The German DCAT-AP.de treats downloadURL as a not so + * important optional properties and relies on the accessURL. However, the European data portal values the * downloadURL property highly. */ void addDownloadURLs(Model model) { @@ -81,9 +86,40 @@ public class CatalogFilter { final Resource accessURL = distribution.getPropertyResourceValue(DCAT.accessURL); final Resource downloadURL = distribution.getPropertyResourceValue(DCAT.downloadURL); - if( downloadURL == null ) { + if (downloadURL == null) { distribution.addProperty(DCAT.downloadURL, accessURL); } + } + } + + private Resource replaceURIifNecessary(Resource res) { + if (res == null) return null; + final String uri = res.getURI(); + + for (String s : urlReplacements.keySet()) { + if (uri.startsWith(s)) { + return ResourceFactory.createResource(uri.replaceFirst(s, urlReplacements.get(s))); + } + } + return res; + } + + void rewriteDownloadAndAccessURLs(Model model) { + final ResIterator it = model.listSubjectsWithProperty(RDF.type, DCAT.Distribution); + while (it.hasNext()) { + final Resource distribution = it.next(); + + final Resource accessURL = replaceURIifNecessary(distribution.getPropertyResourceValue(DCAT.accessURL)); + final Resource downloadURL = replaceURIifNecessary(distribution.getPropertyResourceValue(DCAT.downloadURL)); + + if (accessURL != null) { + distribution.removeAll(DCAT.accessURL); + distribution.addProperty(DCAT.accessURL, accessURL); + } + if (downloadURL != null) { + distribution.removeAll(DCAT.downloadURL); + distribution.addProperty(DCAT.downloadURL, downloadURL); + } } } @@ -216,4 +252,17 @@ public class CatalogFilter { return atLeastOneValidFormat; } + @Override + public void afterPropertiesSet() throws Exception { + + // Interpret the (optionally) specified replacement of download URLs. + if (replaceURL.size() % 2 != 0) { + throw new BeanInitializationException("replaceURL must be an array of even size, e.g. replaceURL= {'http://10.61.35.179/','https://opendata.schleswig-holstein.de/'}"); + } + for (int i = 0; i < replaceURL.size(); i += 2) { + final String source = replaceURL.get(i); + final String target = replaceURL.get(i + 1); + urlReplacements.put(source, target); + } + } } diff --git a/src/main/java/de/landsh/opendata/catalogproxy/FilterController.java b/src/main/java/de/landsh/opendata/catalogproxy/FilterController.java index ecefbf5d4d9a988acf35c0458d77cf7bc40289dc..4b0c83ce9d8dd8acc8c0221a722e3ae4e9c3927c 100644 --- a/src/main/java/de/landsh/opendata/catalogproxy/FilterController.java +++ b/src/main/java/de/landsh/opendata/catalogproxy/FilterController.java @@ -30,16 +30,16 @@ public class FilterController { if (page == null) page = 1; - log.info("catalog.xml?page={}", page); + log.debug("catalog.xml?page={}", page); - InputStream is = new URL(remoteURL + "catalog.xml?page=" + page).openStream(); - Model model = catalogFilter.work(is); + final InputStream is = new URL(remoteURL + "catalog.xml?page=" + page).openStream(); + final Model model = catalogFilter.work(is); is.close(); response.setCharacterEncoding("utf-8"); response.setContentType("application/rdf+xml"); - Writer writer = response.getWriter(); + final Writer writer = response.getWriter(); model.write(writer); writer.close(); } diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index fd9ea6c32aca5bcc9c4520cc43567099ef63c941..7cd634722ef03212b88c9262d1b083fe56df84fd 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -1,2 +1,4 @@ baseURL=http://localhost:8080/ remoteURL=https://opendata.schleswig-holstein.de/ +replaceURL= {'http://10.61.35.179/','https://opendata.schleswig-holstein.de/','http://134.245.19.83/','https://opendata.zitsh.de/' } + diff --git a/src/test/java/de/landsh/opendata/catalogproxy/CatalogFilterTest.java b/src/test/java/de/landsh/opendata/catalogproxy/CatalogFilterTest.java index 9b570bda331eccb069bc7e92efc6eae3d039c537..e86a7aceadf8da7ea5c587945503089cad67b7bf 100644 --- a/src/test/java/de/landsh/opendata/catalogproxy/CatalogFilterTest.java +++ b/src/test/java/de/landsh/opendata/catalogproxy/CatalogFilterTest.java @@ -15,6 +15,8 @@ import org.junit.jupiter.api.Test; import java.io.InputStream; import java.io.StringWriter; +import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import static org.junit.jupiter.api.Assertions.*; @@ -119,4 +121,20 @@ public class CatalogFilterTest { assertNotNull(downloadURL); assertEquals("http://example.org/file.csv", downloadURL.getURI()); } + + @Test + public void rewriteDownloadAndAccessURLs() throws Exception { + catalogFilter.replaceURL = Arrays.asList("http://example.org/","https://opendata.sh/","https://www.statistik-nord.de/","https://data.sh/"); + catalogFilter.afterPropertiesSet(); + + final Model model = parseRdf(getClass().getResourceAsStream("/with_downloadURL.xml")); + catalogFilter.rewriteDownloadAndAccessURLs(model); + + final ResIterator it = model.listSubjectsWithProperty(RDF.type, DCAT.Distribution); + final Resource distribution = it.next(); + final Resource accessURL = distribution.getPropertyResourceValue(DCAT.accessURL); + final Resource downloadURL = distribution.getPropertyResourceValue(DCAT.downloadURL); + assertEquals("https://data.sh/fileadmin/Dokumente/Statistische_Berichte/landwirtschaft/C_II_1_m_S/C_II_1_m0409_S.pdf", accessURL.getURI()); + assertEquals("https://opendata.sh/file.csv", downloadURL.getURI()); + } }