diff --git a/pom.xml b/pom.xml index 7671cc5fa4dd58d591f93671d1f3d29aff880c50..cd8e4c7c0b297ccfe5b41ff67f770bdd62c1aca3 100644 --- a/pom.xml +++ b/pom.xml @@ -1,21 +1,20 @@ <?xml version="1.0" encoding="utf-8"?> -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" -xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <parent> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-parent</artifactId> - <version>3.2.2</version> + <version>3.4.0</version> <relativePath /> </parent> <groupId>de.landsh.opendata</groupId> <artifactId>dcat-catalog-proxy</artifactId> - <version>1.3.0</version> + <version>1.4.0-SNAPSHOT</version> <name>dcat-catalog-proxy</name> <description>DCAT catalog proxy</description> <properties> <java.version>17</java.version> - <jena.version>4.7.0</jena.version> + <jena.version>5.2.0</jena.version> </properties> <dependencies> <dependency> @@ -80,6 +79,12 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x <artifactId>snakeyaml</artifactId> <version>1.32</version> </dependency> + <!-- https://mvnrepository.com/artifact/commons-io/commons-io --> + <dependency> + <groupId>commons-io</groupId> + <artifactId>commons-io</artifactId> + <version>2.18.0</version> + </dependency> </dependencies> </dependencyManagement> <build> @@ -92,7 +97,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x <plugin> <groupId>org.jacoco</groupId> <artifactId>jacoco-maven-plugin</artifactId> - <version>0.8.7</version> + <version>0.8.12</version> <executions> <execution> <id>coverage-initialize</id> @@ -157,7 +162,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x <plugin> <groupId>org.owasp</groupId> <artifactId>dependency-check-maven</artifactId> - <version>7.4.4</version> + <version>11.1.0</version> <configuration> <suppressionFiles> <suppressionFile>owasp-suppressions.xml</suppressionFile> @@ -165,6 +170,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x <failBuildOnCVSS>8</failBuildOnCVSS> <assemblyAnalyzerEnabled>false</assemblyAnalyzerEnabled> <failOnError>true</failOnError> + <nvdApiKey>${env.NVD_API_KEY}</nvdApiKey> </configuration> <executions> <execution> @@ -177,7 +183,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x <plugin> <groupId>org.codehaus.mojo</groupId> <artifactId>versions-maven-plugin</artifactId> - <version>2.14.2</version> + <version>2.18.0</version> <configuration> <rulesUri>file:///${project.basedir}/rules.xml</rulesUri> </configuration> @@ -187,14 +193,6 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x <artifactId>spotbugs-maven-plugin</artifactId> <version>4.5.3.0</version> </plugin> - <plugin> - <groupId>org.codehaus.mojo</groupId> - <artifactId>versions-maven-plugin</artifactId> - <version>2.14.2</version> - <configuration> - <rulesUri>file:///${project.basedir}/rules.xml</rulesUri> - </configuration> - </plugin> </plugins> </build> </project> diff --git a/rules.xml b/rules.xml index f5be1873c4c6edd8ba93a4e86628febd3e61deaf..2b6901820718196b3ab82510f209fbb4721c5a93 100644 --- a/rules.xml +++ b/rules.xml @@ -11,15 +11,5 @@ <ignoreVersion type="regex">(?i).*M(?:-?\d+)?</ignoreVersion> </ignoreVersions> <rules> - - <!-- 4.7.0 is the last version that work. There is an additional rule for - IRIs in later versions that prevents some of CKAN's responses - to be parsed. --> - <rule groupId="org.apache.jena" comparisonMethod="maven"> - <ignoreVersions> - <ignoreVersion type="regex">4.[89].*</ignoreVersion> - <ignoreVersion type="regex">4.1[0-9].*</ignoreVersion> - </ignoreVersions> - </rule> </rules> </ruleset> diff --git a/src/main/java/de/landsh/opendata/catalogproxy/CatalogFilter.java b/src/main/java/de/landsh/opendata/catalogproxy/CatalogFilter.java index 14b02f209bd98555d42e0cdb53ca32d54c40146f..6bf2bd21eac6a22ec4a91405c8a5b2d81b038962 100644 --- a/src/main/java/de/landsh/opendata/catalogproxy/CatalogFilter.java +++ b/src/main/java/de/landsh/opendata/catalogproxy/CatalogFilter.java @@ -14,6 +14,7 @@ import org.springframework.beans.factory.InitializingBean; import org.springframework.beans.factory.annotation.Value; import java.io.InputStream; +import java.nio.channels.Selector; import java.util.*; /** @@ -359,8 +360,7 @@ public class CatalogFilter implements InitializingBean { void changeStartAndEndDate(Model model) { LinkedList<Statement> oldStatements = new LinkedList<>(); - Selector selectorStart = new SimpleSelector(null, model.createProperty("http://schema.org/startDate"), (Object) null); - StmtIterator it = model.listStatements(selectorStart); + StmtIterator it = model.listStatements(null, model.createProperty("http://schema.org/startDate"), (String) null); while (it.hasNext()) { Statement stmt = it.next(); oldStatements.add(stmt); @@ -368,8 +368,7 @@ public class CatalogFilter implements InitializingBean { } it.close(); - Selector selectorEnd = new SimpleSelector(null, model.createProperty("http://schema.org/endDate"), (Object) null); - it = model.listStatements(selectorEnd); + it = model.listStatements(null, model.createProperty("http://schema.org/endDate"), (String) null); while (it.hasNext()) { Statement stmt = it.next(); oldStatements.add(stmt); diff --git a/src/main/java/de/landsh/opendata/catalogproxy/FilterInvalidRDF.java b/src/main/java/de/landsh/opendata/catalogproxy/FilterInvalidRDF.java index 0b9029532d3ae152d23c94d12b440992fbfc3779..f7ddae35a6f8c96389f4557095dc25fb2ce5a6f2 100644 --- a/src/main/java/de/landsh/opendata/catalogproxy/FilterInvalidRDF.java +++ b/src/main/java/de/landsh/opendata/catalogproxy/FilterInvalidRDF.java @@ -1,6 +1,11 @@ package de.landsh.opendata.catalogproxy; +import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.lang3.StringUtils; +import org.apache.jena.irix.IRIException; +import org.apache.jena.irix.IRIs; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.BufferedReader; import java.io.IOException; @@ -10,8 +15,9 @@ import java.nio.charset.StandardCharsets; public class FilterInvalidRDF extends InputStream { - private static final String RDF_ABOUT_DOUBLE_QUOTE = "rdf:about=\""; - private static final String RDF_ABOUT_SINGLE_QUOTE = "rdf:about='"; + + private static final Logger log = LoggerFactory.getLogger(FilterInvalidRDF.class); + private final BufferedReader reader; boolean initialized = false; private byte[] currentLine = null; @@ -40,27 +46,51 @@ public class FilterInvalidRDF extends InputStream { line = StringUtils.join(fragments, "&"); } + line = fixRdfAbout(line, "about"); + return fixRdfAbout(line, "resource"); + } + + private static String fixRdfAbout(String line, String propertyName) { + final String propertyWithDoubleQuote = "rdf:" + propertyName + "=\""; + final String propertyWithSingleQuote = "rdf:" + propertyName + "='"; + String before = null; String iri = null; String after = null; - if (line.contains(RDF_ABOUT_DOUBLE_QUOTE)) { - before = StringUtils.substringBefore(line, RDF_ABOUT_DOUBLE_QUOTE) + RDF_ABOUT_DOUBLE_QUOTE; - iri = StringUtils.substringBetween(line, RDF_ABOUT_DOUBLE_QUOTE, "\""); - after = "\"" + StringUtils.substringAfter(StringUtils.substringAfter(line, RDF_ABOUT_DOUBLE_QUOTE), "\""); - } else if (line.contains(RDF_ABOUT_SINGLE_QUOTE)) { - before = StringUtils.substringBefore(line, RDF_ABOUT_DOUBLE_QUOTE) + RDF_ABOUT_SINGLE_QUOTE; - iri = StringUtils.substringBetween(line, RDF_ABOUT_DOUBLE_QUOTE, "'"); - after = "'" + StringUtils.substringAfter(StringUtils.substringAfter(line, RDF_ABOUT_DOUBLE_QUOTE), "'"); + if (line.contains(propertyWithDoubleQuote)) { + before = StringUtils.substringBefore(line, propertyWithDoubleQuote) + propertyWithDoubleQuote; + iri = StringUtils.substringBetween(line, propertyWithDoubleQuote, "\""); + after = "\"" + StringUtils.substringAfter(StringUtils.substringAfter(line, propertyWithDoubleQuote), "\""); + } else if (line.contains(propertyWithSingleQuote)) { + before = StringUtils.substringBefore(line, propertyWithDoubleQuote) + propertyWithSingleQuote; + iri = StringUtils.substringBetween(line, propertyWithDoubleQuote, "'"); + after = "'" + StringUtils.substringAfter(StringUtils.substringAfter(line, propertyWithDoubleQuote), "'"); } if (iri == null) { return line; } else { - final String fixedIRI = iri.replaceAll(" ", "%20"); - return before + fixedIRI + after; + + return before + fixIRI(iri) + after; } } + private static String fixIRI(String iri) { + String result = iri.replaceAll(" ", "%20"); + + try { + IRIs.reference(result); + } catch (IRIException e) { + log.info("Found a really bad IRI: '{}'", iri); + + // A reliable way to turn any text (even a broken URI) into a URI is to use a hash value. + + result = "urn:" + DigestUtils.md5Hex(iri); + } + + return result; + } + private void readNextLine() throws IOException { final String line = reader.readLine(); if (line == null) { @@ -96,7 +126,7 @@ public class FilterInvalidRDF extends InputStream { index++; return '\n'; } - + byte result = currentLine[index]; index++; return result; diff --git a/src/test/java/de/landsh/opendata/catalogproxy/CatalogFilterTest.java b/src/test/java/de/landsh/opendata/catalogproxy/CatalogFilterTest.java index eb94bbae5ea410c0a534f55d2004a2c84eb2b73b..b2443367bf768d9e5af604cddaeed81c023b2240 100644 --- a/src/test/java/de/landsh/opendata/catalogproxy/CatalogFilterTest.java +++ b/src/test/java/de/landsh/opendata/catalogproxy/CatalogFilterTest.java @@ -1,6 +1,9 @@ package de.landsh.opendata.catalogproxy; -import org.apache.jena.rdf.model.*; +import org.apache.jena.rdf.model.Model; +import org.apache.jena.rdf.model.ModelFactory; +import org.apache.jena.rdf.model.ResIterator; +import org.apache.jena.rdf.model.Resource; import org.apache.jena.riot.RDFLanguages; import org.apache.jena.riot.RDFParser; import org.apache.jena.riot.system.ErrorHandlerFactory; @@ -270,11 +273,8 @@ public class CatalogFilterTest { try (final InputStream inputStream = getClass().getResourceAsStream("/catalog.xml")) { final Model model = catalogFilter.work(inputStream); - Selector selectorSchema = new SimpleSelector(null, model.createProperty("http://schema.org/startDate"), (Object) null); - assertFalse(model.listStatements(selectorSchema).hasNext()); - - Selector selectorDCAT = new SimpleSelector(null, DCAT.startDate, (Object) null); - assertTrue(model.listStatements(selectorDCAT).hasNext()); + assertFalse(model.listStatements(null, model.createProperty("http://schema.org/startDate"), (String) null).hasNext()); + assertTrue(model.listStatements(null, DCAT.startDate, (String) null).hasNext()); } } }