Skip to content
Snippets Groups Projects
Commit 3150e512 authored by Jesper Zedlitz's avatar Jesper Zedlitz
Browse files

Möglichkeit zum Ersetzen von downloadURL und accessURL eingebaut

parent 7669ba32
Branches
No related tags found
No related merge requests found
...@@ -9,6 +9,8 @@ import org.apache.jena.util.ResourceUtils; ...@@ -9,6 +9,8 @@ import org.apache.jena.util.ResourceUtils;
import org.apache.jena.vocabulary.DCAT; import org.apache.jena.vocabulary.DCAT;
import org.apache.jena.vocabulary.DCTerms; import org.apache.jena.vocabulary.DCTerms;
import org.apache.jena.vocabulary.RDF; import org.apache.jena.vocabulary.RDF;
import org.springframework.beans.factory.BeanInitializationException;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.beans.factory.annotation.Value; import org.springframework.beans.factory.annotation.Value;
import java.io.InputStream; import java.io.InputStream;
...@@ -17,7 +19,7 @@ import java.util.*; ...@@ -17,7 +19,7 @@ import java.util.*;
/** /**
* Filtert eine DCAT-AP.de konforme catalog.xml Datei nach festgelegten Kriterien. * Filtert eine DCAT-AP.de konforme catalog.xml Datei nach festgelegten Kriterien.
*/ */
public class CatalogFilter { public class CatalogFilter implements InitializingBean {
private static final Collection<Resource> UNWANTED_FORMATS = Arrays.asList( private static final Collection<Resource> UNWANTED_FORMATS = Arrays.asList(
...@@ -27,7 +29,9 @@ public class CatalogFilter { ...@@ -27,7 +29,9 @@ public class CatalogFilter {
ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/HTML") ResourceFactory.createResource("http://publications.europa.eu/resource/authority/file-type/HTML")
); );
private static final Property LOCN_GEOMETRY = ResourceFactory.createProperty("http://www.w3.org/ns/locn#geometry"); private static final Property LOCN_GEOMETRY = ResourceFactory.createProperty("http://www.w3.org/ns/locn#geometry");
final private Map<String, String> urlReplacements = new HashMap<>();
@Value("#{${replaceURL:''}}")
List<String> replaceURL;
@Value("${baseURL:http://localhost:8080/}") @Value("${baseURL:http://localhost:8080/}")
private String baseURL; private String baseURL;
...@@ -63,14 +67,15 @@ public class CatalogFilter { ...@@ -63,14 +67,15 @@ public class CatalogFilter {
removeUnusedLocations(model); removeUnusedLocations(model);
minimizeLocations(model); minimizeLocations(model);
rewriteHydraURLs(model); rewriteHydraURLs(model);
rewriteDownloadAndAccessURLs(model);
addDownloadURLs(model); addDownloadURLs(model);
return model; return model;
} }
/** /**
* Add downloadURL properties to Distributions. The German DCAT-AP.de treats downloadURL as an no so * Add downloadURL properties to Distributions. The German DCAT-AP.de treats downloadURL as a not so
* important optional properties and relies the the accessURL. However, the European data portal values the * important optional properties and relies on the accessURL. However, the European data portal values the
* downloadURL property highly. * downloadURL property highly.
*/ */
void addDownloadURLs(Model model) { void addDownloadURLs(Model model) {
...@@ -84,6 +89,37 @@ public class CatalogFilter { ...@@ -84,6 +89,37 @@ public class CatalogFilter {
if (downloadURL == null) { if (downloadURL == null) {
distribution.addProperty(DCAT.downloadURL, accessURL); distribution.addProperty(DCAT.downloadURL, accessURL);
} }
}
}
private Resource replaceURIifNecessary(Resource res) {
if (res == null) return null;
final String uri = res.getURI();
for (String s : urlReplacements.keySet()) {
if (uri.startsWith(s)) {
return ResourceFactory.createResource(uri.replaceFirst(s, urlReplacements.get(s)));
}
}
return res;
}
void rewriteDownloadAndAccessURLs(Model model) {
final ResIterator it = model.listSubjectsWithProperty(RDF.type, DCAT.Distribution);
while (it.hasNext()) {
final Resource distribution = it.next();
final Resource accessURL = replaceURIifNecessary(distribution.getPropertyResourceValue(DCAT.accessURL));
final Resource downloadURL = replaceURIifNecessary(distribution.getPropertyResourceValue(DCAT.downloadURL));
if (accessURL != null) {
distribution.removeAll(DCAT.accessURL);
distribution.addProperty(DCAT.accessURL, accessURL);
}
if (downloadURL != null) {
distribution.removeAll(DCAT.downloadURL);
distribution.addProperty(DCAT.downloadURL, downloadURL);
}
} }
} }
...@@ -216,4 +252,17 @@ public class CatalogFilter { ...@@ -216,4 +252,17 @@ public class CatalogFilter {
return atLeastOneValidFormat; return atLeastOneValidFormat;
} }
@Override
public void afterPropertiesSet() throws Exception {
// Interpret the (optionally) specified replacement of download URLs.
if (replaceURL.size() % 2 != 0) {
throw new BeanInitializationException("replaceURL must be an array of even size, e.g. replaceURL= {'http://10.61.35.179/','https://opendata.schleswig-holstein.de/'}");
}
for (int i = 0; i < replaceURL.size(); i += 2) {
final String source = replaceURL.get(i);
final String target = replaceURL.get(i + 1);
urlReplacements.put(source, target);
}
}
} }
...@@ -30,16 +30,16 @@ public class FilterController { ...@@ -30,16 +30,16 @@ public class FilterController {
if (page == null) if (page == null)
page = 1; page = 1;
log.info("catalog.xml?page={}", page); log.debug("catalog.xml?page={}", page);
InputStream is = new URL(remoteURL + "catalog.xml?page=" + page).openStream(); final InputStream is = new URL(remoteURL + "catalog.xml?page=" + page).openStream();
Model model = catalogFilter.work(is); final Model model = catalogFilter.work(is);
is.close(); is.close();
response.setCharacterEncoding("utf-8"); response.setCharacterEncoding("utf-8");
response.setContentType("application/rdf+xml"); response.setContentType("application/rdf+xml");
Writer writer = response.getWriter(); final Writer writer = response.getWriter();
model.write(writer); model.write(writer);
writer.close(); writer.close();
} }
......
baseURL=http://localhost:8080/ baseURL=http://localhost:8080/
remoteURL=https://opendata.schleswig-holstein.de/ remoteURL=https://opendata.schleswig-holstein.de/
replaceURL= {'http://10.61.35.179/','https://opendata.schleswig-holstein.de/','http://134.245.19.83/','https://opendata.zitsh.de/' }
...@@ -15,6 +15,8 @@ import org.junit.jupiter.api.Test; ...@@ -15,6 +15,8 @@ import org.junit.jupiter.api.Test;
import java.io.InputStream; import java.io.InputStream;
import java.io.StringWriter; import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import static org.junit.jupiter.api.Assertions.*; import static org.junit.jupiter.api.Assertions.*;
...@@ -119,4 +121,20 @@ public class CatalogFilterTest { ...@@ -119,4 +121,20 @@ public class CatalogFilterTest {
assertNotNull(downloadURL); assertNotNull(downloadURL);
assertEquals("http://example.org/file.csv", downloadURL.getURI()); assertEquals("http://example.org/file.csv", downloadURL.getURI());
} }
@Test
public void rewriteDownloadAndAccessURLs() throws Exception {
catalogFilter.replaceURL = Arrays.asList("http://example.org/","https://opendata.sh/","https://www.statistik-nord.de/","https://data.sh/");
catalogFilter.afterPropertiesSet();
final Model model = parseRdf(getClass().getResourceAsStream("/with_downloadURL.xml"));
catalogFilter.rewriteDownloadAndAccessURLs(model);
final ResIterator it = model.listSubjectsWithProperty(RDF.type, DCAT.Distribution);
final Resource distribution = it.next();
final Resource accessURL = distribution.getPropertyResourceValue(DCAT.accessURL);
final Resource downloadURL = distribution.getPropertyResourceValue(DCAT.downloadURL);
assertEquals("https://data.sh/fileadmin/Dokumente/Statistische_Berichte/landwirtschaft/C_II_1_m_S/C_II_1_m0409_S.pdf", accessURL.getURI());
assertEquals("https://opendata.sh/file.csv", downloadURL.getURI());
}
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment