From 46abcd3f486ec772ab0c4c3476aa331668debb1e Mon Sep 17 00:00:00 2001
From: Jesper Zedlitz <jesper@zedlitz.de>
Date: Thu, 15 Sep 2022 12:06:45 +0200
Subject: [PATCH] =?UTF-8?q?Toleriere=20ung=C3=BCltige=20URIs=20im=20Eingab?=
 =?UTF-8?q?edokument?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CKAN generiert auch ungültige URIs in der catalog.xml. Trotzdem das der dcat-catalog-proxy nicht mit einem Fehler aussteigen.

closes #6
---
 .../opendata/catalogproxy/CatalogFilter.java  |  2 +-
 .../catalogproxy/CatalogFilterTest.java       | 10 ++
 src/test/resources/invalid_uri.xml            | 96 +++++++++++++++++++
 3 files changed, 107 insertions(+), 1 deletion(-)
 create mode 100644 src/test/resources/invalid_uri.xml

diff --git a/src/main/java/de/landsh/opendata/catalogproxy/CatalogFilter.java b/src/main/java/de/landsh/opendata/catalogproxy/CatalogFilter.java
index 01526c5..f4e80e4 100644
--- a/src/main/java/de/landsh/opendata/catalogproxy/CatalogFilter.java
+++ b/src/main/java/de/landsh/opendata/catalogproxy/CatalogFilter.java
@@ -52,7 +52,7 @@ public class CatalogFilter implements InitializingBean {
         RDFParser.create()
                 .source(new FilterInvalidRDF(inputStream))
                 .lang(RDFLanguages.RDFXML)
-                .errorHandler(ErrorHandlerFactory.errorHandlerStrict)
+                .errorHandler(ErrorHandlerFactory.errorHandlerSimple())
                 .base(baseURL)
                 .parse(model);
 
diff --git a/src/test/java/de/landsh/opendata/catalogproxy/CatalogFilterTest.java b/src/test/java/de/landsh/opendata/catalogproxy/CatalogFilterTest.java
index 066c032..8d36f4a 100644
--- a/src/test/java/de/landsh/opendata/catalogproxy/CatalogFilterTest.java
+++ b/src/test/java/de/landsh/opendata/catalogproxy/CatalogFilterTest.java
@@ -230,4 +230,14 @@ public class CatalogFilterTest {
         inputStream.close();
     }
 
+    /**
+     * CKAN produces catalog.xml documents with invalid IRIs and invalid XML content (ampersand not escaped). The
+     * catalog proxy must be able to cope with this.
+     */
+    @Test
+    public void work_invalid_uri() {
+        final InputStream inputStream = getClass().getResourceAsStream("/invalid_uri.xml");
+        catalogFilter.work(inputStream);
+    }
+
 }
diff --git a/src/test/resources/invalid_uri.xml b/src/test/resources/invalid_uri.xml
new file mode 100644
index 0000000..565ccfc
--- /dev/null
+++ b/src/test/resources/invalid_uri.xml
@@ -0,0 +1,96 @@
+<?xml version="1.0" encoding="utf-8"?>
+<rdf:RDF
+  xmlns:foaf="http://xmlns.com/foaf/0.1/"
+  xmlns:spdx="http://spdx.org/rdf/terms#"
+  xmlns:locn="http://www.w3.org/ns/locn#"
+  xmlns:adms="http://www.w3.org/ns/adms#"
+  xmlns:hydra="http://www.w3.org/ns/hydra/core#"
+  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+  xmlns:dcat="http://www.w3.org/ns/dcat#"
+  xmlns:dct="http://purl.org/dc/terms/"
+  xmlns:dcatde="http://dcat-ap.de/def/dcatde/"
+  xmlns:skos="http://www.w3.org/2004/02/skos/core#"
+  xmlns:vcard="http://www.w3.org/2006/vcard/ns#"
+  xmlns:schema="http://schema.org/"
+>
+  
+ <dcat:Distribution rdf:about="http://elm_bp_145_o_b-a.pdf">
+    <dcatde:licenseAttributionByText>Stadt Elmshorn</dcatde:licenseAttributionByText>
+    <dcat:mediaType>application/pdf</dcat:mediaType>
+    <dct:format rdf:resource="http://publications.europa.eu/resource/authority/file-type/PDF"/>
+    <dct:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2022-05-25T12:59:29.036801</dct:modified>
+    <dct:title>Elm_BP_067_02Ae_Ue_Planzeichnung</dct:title>
+    <dct:license rdf:resource="http://dcat-ap.de/def/licenses/dl-by-de/2.0"/>
+    <dcat:byteSize rdf:datatype="http://www.w3.org/2001/XMLSchema#decimal">922229.0</dcat:byteSize>
+    <dcat:accessURL rdf:resource="http://elm_bp_145_o_b-a.pdf"/>
+    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2022-05-25T12:59:29.123965</dct:issued>
+    <spdx:checksum>
+      <spdx:Checksum rdf:nodeID="N817a07a8391741258369ba536b3872e2">
+        <spdx:checksumValue rdf:datatype="http://www.w3.org/2001/XMLSchema#hexBinary">47d85146d000eb5735f867cb17671585</spdx:checksumValue>
+        <spdx:algorithm rdf:resource="http://dcat-ap.de/def/hashAlgorithms/md/5"/>
+      </spdx:Checksum>
+    </spdx:checksum>
+  </dcat:Distribution>  
+   <dcat:Distribution rdf:about="https://opendata.schleswig-holstein.de/dataset/0a97ffdb-6d67-43c4-a35e-e600e74e2b1a/resource/49557a31-cafe-4197-9544-5d69bb55639f">
+    <spdx:checksum>
+      <spdx:Checksum rdf:nodeID="Na5813995d42846e8aa29f6652856bd3c">
+        <spdx:checksumValue rdf:datatype="http://www.w3.org/2001/XMLSchema#hexBinary">8f122c9e53d0db07f9bb4777f88cdc20</spdx:checksumValue>
+        <spdx:algorithm rdf:resource="http://dcat-ap.de/def/hashAlgorithms/md/5"/>
+      </spdx:Checksum>
+    </spdx:checksum>
+    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2022-05-25T12:58:17.361417</dct:issued>
+    <dcat:mediaType>application/pdf</dcat:mediaType>
+    <dct:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2022-05-25T12:58:17.275684</dct:modified>
+    <dcat:accessURL rdf:resource="https://opendata.schleswig-holstein.de/dataset/0a97ffdb-6d67-43c4-a35e-e600e74e2b1a/resource/49557a31-cafe-4197-9544-5d69bb55639f/download/elm_bp_067_02ae_o_s.pdf"/>
+    <dct:format rdf:resource="http://publications.europa.eu/resource/authority/file-type/PDF"/>
+    <dcat:byteSize rdf:datatype="http://www.w3.org/2001/XMLSchema#decimal">23123.0</dcat:byteSize>
+    <dcatde:licenseAttributionByText>Stadt Elmshorn</dcatde:licenseAttributionByText>
+    <dct:title>Elm_BP_067_02Ae_Satzungstext</dct:title>
+    <dct:license rdf:resource="http://dcat-ap.de/def/licenses/dl-by-de/2.0"/>
+  </dcat:Distribution>
+  <dcat:Catalog rdf:about="https://opendata.schleswig-holstein.de">
+    <dcat:dataset>
+      <dcat:Dataset rdf:about="https://opendata.schleswig-holstein.de/dataset/0a97ffdb-6d67-43c4-a35e-e600e74e2b1a">
+        <dcatde:contributorID rdf:resource="http://dcat-ap.de/def/contributors/schleswigHolstein"/>
+        <dcat:distribution rdf:resource="https://opendata.schleswig-holstein.de/dataset/0a97ffdb-6d67-43c4-a35e-e600e74e2b1a/resource/35877a59-7426-4570-be51-58ef6853da18"/>
+        <dcat:distribution rdf:resource="https://opendata.schleswig-holstein.de/dataset/0a97ffdb-6d67-43c4-a35e-e600e74e2b1a/resource/49557a31-cafe-4197-9544-5d69bb55639f"/>
+        <dcat:keyword>Bauleitplan</dcat:keyword>
+        <dcat:keyword>Bauleitpläne</dcat:keyword>
+        <dcat:keyword>Bauleitplanung</dcat:keyword>
+        <dcat:keyword>BP</dcat:keyword>
+        <dcat:keyword>GDIMRH</dcat:keyword>
+        <dcat:keyword>opendata</dcat:keyword>
+        <dcat:keyword>Raumordnung</dcat:keyword>
+        <dcat:keyword>Städtebau</dcat:keyword>
+        <dcat:keyword>Stadt Elmshorn</dcat:keyword>
+        <dcat:theme rdf:resource="http://publications.europa.eu/resource/authority/data-theme/GOVE"/>
+        <dct:identifier>0a97ffdb-6d67-43c4-a35e-e600e74e2b1a</dct:identifier>
+        <dct:language rdf:resource="http://publications.europa.eu/resource/authority/language/DEU"/>
+        <dct:license rdf:resource="http://dcat-ap.de/def/licenses/dl-by-de/2.0"/>
+        <dct:references rdf:resource="https://musterdatenkatalog.de/def/musterdatensatz/raumordnung/bebauungsplaene"/>
+        <dct:spatial rdf:resource="http://dcat-ap.de/def/politicalGeocoding/regionalKey/010560015015"/>
+        <dct:temporal>
+          <dct:PeriodOfTime rdf:nodeID="N299ae0ecda0747d2b3be97b6856aed49">
+            <schema:startDate rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">1977-12-14T00:00:00</schema:startDate>
+          </dct:PeriodOfTime>
+        </dct:temporal>
+        <dcat:keyword>Bebauungsplan</dcat:keyword>
+        <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2022-05-12T00:00:00</dct:issued>
+        <dct:publisher rdf:resource="https://opendata.schleswig-holstein.de/organization/e96642f4-a0b0-4717-b139-1ad556414b64"/>
+        <dcat:keyword>B-Plan</dcat:keyword>
+        <dcat:keyword>Elmshorn</dcat:keyword>
+        <dct:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2022-08-12T10:40:38.901366</dct:modified>
+        <dcat:keyword>Bebauungspläne</dcat:keyword>
+        <dcat:keyword>inspireidentifiziert</dcat:keyword>
+        <dct:description>Realschule Ramskamp Rechtswirksam seit: 14.12.1977 Koordinatensystem: EPSG Code 25832</dct:description>
+        <dcatde:licenseAttributionByText>Stadt Elmshorn</dcatde:licenseAttributionByText>
+        <dcat:distribution rdf:resource="https://opendata.schleswig-holstein.de/dataset/0a97ffdb-6d67-43c4-a35e-e600e74e2b1a/resource/7a1f2851-9e07-4d95-9ac5-411e32ae7e70"/>
+        <dcat:theme rdf:resource="http://publications.europa.eu/resource/authority/data-theme/REGI"/>
+        <dct:title>B-Plan - B 67 (2)</dct:title>
+        <dcat:keyword>BPlan</dcat:keyword>
+        <dcat:keyword>Boden</dcat:keyword>
+        <dcat:keyword>Bodennutzung</dcat:keyword>
+      </dcat:Dataset>
+    </dcat:dataset>
+  </dcat:Catalog>
+  </rdf:RDF>
-- 
GitLab