diff --git a/pom.xml b/pom.xml index 35617a02f17cb67a241b5029d4ed8108d657ccd2..c88f9507e01822af46b02923f7ede586a5da265a 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x </parent> <groupId>de.landsh.opendata</groupId> <artifactId>dcat-catalog-proxy</artifactId> - <version>1.2</version> + <version>1.3.0</version> <name>dcat-catalog-proxy</name> <description>DCAT catalog proxy</description> <properties> diff --git a/src/main/java/de/landsh/opendata/catalogproxy/CatalogFilter.java b/src/main/java/de/landsh/opendata/catalogproxy/CatalogFilter.java index f4e80e4db584e96fe737f2184f9945a2e1a5d84d..8cc4c02808f0c5c2a6e880e77032c2b31fb0a5c1 100644 --- a/src/main/java/de/landsh/opendata/catalogproxy/CatalogFilter.java +++ b/src/main/java/de/landsh/opendata/catalogproxy/CatalogFilter.java @@ -78,6 +78,7 @@ public class CatalogFilter implements InitializingBean { addDownloadURLs(model); addAccessRights(model); addRights(model); + fixMediaType(model); return model; } @@ -166,6 +167,24 @@ public class CatalogFilter implements InitializingBean { } } + /** + * The dcat:mediaType of a Distribution must be a resources. Sometime CKAN returns a literal. + * This method will fix that problem. + */ + void fixMediaType(Model model) { + final ResIterator it = model.listSubjectsWithProperty(RDF.type, DCAT.Distribution); + while (it.hasNext()) { + final Resource distribution = it.next(); + + final Statement mediaTypeStatement = distribution.getProperty(DCAT.mediaType); + if (mediaTypeStatement != null && mediaTypeStatement.getObject().isLiteral()) { + final String mimeType = mediaTypeStatement.getLiteral().getString().toLowerCase(); + distribution.removeAll(DCAT.mediaType); + distribution.addProperty(DCAT.mediaType, model.createResource("https://www.iana.org/assignments/media-types/" + mimeType)); + } + } + } + void rewriteHydraURLs(Model model) { final ResIterator it = model.listSubjectsWithProperty(RDF.type, ResourceFactory.createResource("http://www.w3.org/ns/hydra/core#PagedCollection")); if (it.hasNext()) { diff --git a/src/test/java/de/landsh/opendata/catalogproxy/CatalogFilterTest.java b/src/test/java/de/landsh/opendata/catalogproxy/CatalogFilterTest.java index 8d36f4a8d5e0dc81048f49a6a077ea02d29e748f..5215d39af5778903005b35909db9aaa6cf2123c1 100644 --- a/src/test/java/de/landsh/opendata/catalogproxy/CatalogFilterTest.java +++ b/src/test/java/de/landsh/opendata/catalogproxy/CatalogFilterTest.java @@ -57,9 +57,9 @@ public class CatalogFilterTest { @Test public void work() throws Exception { - final InputStream inputStream = getClass().getResourceAsStream("/catalog.xml"); - catalogFilter.work(inputStream); - inputStream.close(); + try (final InputStream inputStream = getClass().getResourceAsStream("/catalog.xml")) { + catalogFilter.work(inputStream); + } } @Test @@ -145,13 +145,13 @@ public class CatalogFilterTest { */ @Test public void work_will_preseve_collections() throws Exception { - final InputStream inputStream = getClass().getResourceAsStream("/with_collection.xml"); - final Model model = catalogFilter.work(inputStream); + try (final InputStream inputStream = getClass().getResourceAsStream("/with_collection.xml")) { + final Model model = catalogFilter.work(inputStream); - Assertions.assertEquals(8, countInstances(model, DCAT.Dataset)); - Assertions.assertEquals(7, countInstances(model, DCAT.Distribution)); + Assertions.assertEquals(8, countInstances(model, DCAT.Dataset)); + Assertions.assertEquals(7, countInstances(model, DCAT.Distribution)); - inputStream.close(); + } } /** @@ -160,23 +160,22 @@ public class CatalogFilterTest { */ @Test public void work_will_add_accessRights() throws Exception { - final InputStream inputStream = getClass().getResourceAsStream("/with_collection.xml"); - final Model model = catalogFilter.work(inputStream); - - // Every dataset has a dct:accessRights statement - final ResIterator it = model.listSubjectsWithProperty(RDF.type, DCAT.Dataset); - int count = 0; - while (it.hasNext()) { - final Resource distribution = it.next(); - count++; - final Resource accessRights = distribution.getPropertyResourceValue(DCTerms.accessRights); - assertNotNull(accessRights); - assertEquals("http://publications.europa.eu/resource/authority/access-right/PUBLIC", accessRights.getURI()); + try (final InputStream inputStream = getClass().getResourceAsStream("/with_collection.xml")) { + final Model model = catalogFilter.work(inputStream); + + // Every dataset has a dct:accessRights statement + final ResIterator it = model.listSubjectsWithProperty(RDF.type, DCAT.Dataset); + int count = 0; + while (it.hasNext()) { + final Resource distribution = it.next(); + count++; + final Resource accessRights = distribution.getPropertyResourceValue(DCTerms.accessRights); + assertNotNull(accessRights); + assertEquals("http://publications.europa.eu/resource/authority/access-right/PUBLIC", accessRights.getURI()); + } + + assertEquals(8, count); } - - assertEquals(8, count); - - inputStream.close(); } /** @@ -184,24 +183,23 @@ public class CatalogFilterTest { */ @Test public void work_will_add_rights() throws Exception { - final InputStream inputStream = getClass().getResourceAsStream("/with_collection.xml"); - final Model model = catalogFilter.work(inputStream); - - // Every dataset has a dct:accessRights statement - final ResIterator it = model.listSubjectsWithProperty(RDF.type, DCAT.Distribution); - int count = 0; - while (it.hasNext()) { - final Resource distribution = it.next(); - count++; - final Resource rights = distribution.getPropertyResourceValue(DCTerms.rights); - final Resource license = distribution.getPropertyResourceValue(DCTerms.license); - assertNotNull(rights); - assertEquals(license, rights); + try (final InputStream inputStream = getClass().getResourceAsStream("/with_collection.xml")) { + final Model model = catalogFilter.work(inputStream); + + // Every dataset has a dct:accessRights statement + final ResIterator it = model.listSubjectsWithProperty(RDF.type, DCAT.Distribution); + int count = 0; + while (it.hasNext()) { + final Resource distribution = it.next(); + count++; + final Resource rights = distribution.getPropertyResourceValue(DCTerms.rights); + final Resource license = distribution.getPropertyResourceValue(DCTerms.license); + assertNotNull(rights); + assertEquals(license, rights); + } + + assertEquals(7, count); } - - assertEquals(7, count); - - inputStream.close(); } /** @@ -211,7 +209,7 @@ public class CatalogFilterTest { @Test public void work_invalid_iri() { final InputStream inputStream = getClass().getResourceAsStream("/invalid_iri.xml"); - final Model model = catalogFilter.work(inputStream); + catalogFilter.work(inputStream); } /** @@ -221,13 +219,12 @@ public class CatalogFilterTest { public void work_will_remove_non_government_organization() throws IOException { catalogFilter.unwantedPublishers = Collections.singletonList("https://opendata.schleswig-holstein.de/organization/ee4df032-ec5f-4726-b7ad-a2c708fb53ec"); - final InputStream inputStream = getClass().getResourceAsStream("/two-organizations.xml"); - final Model model = catalogFilter.work(inputStream); - - Assertions.assertEquals(1, countInstances(model, DCAT.Dataset)); - Assertions.assertEquals(1, countInstances(model, DCAT.Distribution)); + try (final InputStream inputStream = getClass().getResourceAsStream("/two-organizations.xml")) { + final Model model = catalogFilter.work(inputStream); - inputStream.close(); + Assertions.assertEquals(1, countInstances(model, DCAT.Dataset)); + Assertions.assertEquals(1, countInstances(model, DCAT.Distribution)); + } } /** @@ -235,9 +232,34 @@ public class CatalogFilterTest { * catalog proxy must be able to cope with this. */ @Test - public void work_invalid_uri() { - final InputStream inputStream = getClass().getResourceAsStream("/invalid_uri.xml"); - catalogFilter.work(inputStream); + public void work_invalid_uri() throws IOException { + try (InputStream inputStream = getClass().getResourceAsStream("/invalid_uri.xml")) { + catalogFilter.work(inputStream); + } + } + + @Test + public void fixMediaType() { + final Model model = parseRdf(getClass().getResourceAsStream("/with_downloadURL.xml")); + catalogFilter.fixMediaType(model); + + final ResIterator it = model.listSubjectsWithProperty(RDF.type, DCAT.Distribution); + final Resource distribution = it.next(); + + final Resource mediaType = distribution.getPropertyResourceValue(DCAT.mediaType); + assertNotNull(mediaType); + assertEquals("https://www.iana.org/assignments/media-types/text/csv", mediaType.getURI()); } + public void fixMediaType_alreadyCorrect() { + final Model model = parseRdf(getClass().getResourceAsStream("/mediaType.xml")); + catalogFilter.fixMediaType(model); + + final ResIterator it = model.listSubjectsWithProperty(RDF.type, DCAT.Distribution); + final Resource distribution = it.next(); + + final Resource mediaType = distribution.getPropertyResourceValue(DCAT.mediaType); + assertNotNull(mediaType); + assertEquals("https://www.iana.org/assignments/media-types/text/csv", mediaType.getURI()); + } } diff --git a/src/test/resources/mediaType.xml b/src/test/resources/mediaType.xml new file mode 100644 index 0000000000000000000000000000000000000000..6f3cd1dc3ffc1b9add73ac2a431aca7624659446 --- /dev/null +++ b/src/test/resources/mediaType.xml @@ -0,0 +1,61 @@ +<?xml version="1.0" encoding="utf-8"?> +<rdf:RDF + xmlns:foaf="http://xmlns.com/foaf/0.1/" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:dcat="http://www.w3.org/ns/dcat#" + xmlns:dct="http://purl.org/dc/terms/" + xmlns:schema="http://schema.org/" + xmlns:vcard="http://www.w3.org/2006/vcard/ns#" + xmlns:dcatde="http://dcat-ap.de/def/dcatde/1.0.1/" +> + <dcat:Catalog rdf:about="http://opendata.schleswig-holstein.de"> + <dcat:dataset> + <dcat:Dataset rdf:about="http://opendata.schleswig-holstein.de/dataset/StaNord_CMS:50330"> + <dcat:keyword>Ernteberichterstattung über Feldfrüchte und Grünland in Schleswig-Holstein</dcat:keyword> + <dcatde:contributorID rdf:resource="http://dcat-ap.de/def/contributors/schleswigHolstein"/> + <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2019-06-01T13:46:55.291496</dct:issued> + <dct:publisher rdf:resource="http://opendata.schleswig-holstein.de/organization/statistikamt-nord"/> + <dct:license rdf:resource="http://dcat-ap.de/def/licenses/dl-by-de/2.0"/> + <dct:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2019-06-01T13:46:55.338566</dct:modified> + <dct:description>Ernteberichterstattung über Feldfrüchte und Grünland in Schleswig-Holstein - Endgültige Erträge und vorläufige Erntemengen von Getreide und Ölfrüchten, vorläufige Kartoffelernte 2004</dct:description> + <dct:title>Ernteberichterstattung über Feldfrüchte und Grünland in Schleswig-Holstein - Endgültige Erträge und vorläufige Erntemengen von Getreide und Ölfrüchten, vorläufige Kartoffelernte 2004</dct:title> + <dct:creator> + <foaf:Organization rdf:nodeID="Nc233acd517104832885bdfe3c81f3a01"> + <foaf:name>Statistisches Amt für Hamburg und Schleswig-Holstein</foaf:name> + <foaf:mbox>info@statistik-nord.de</foaf:mbox> + </foaf:Organization> + </dct:creator> + <dct:spatial rdf:resource="http://dcat-ap.de/def/politicalGeocoding/stateKey/01"/> + <dcat:keyword>Ernte: Feldfrüchte und Grünland</dcat:keyword> + <dct:temporal> + <dct:PeriodOfTime rdf:nodeID="N5cc5efcb2099487a95f039c45a53e3d8"> + <schema:startDate rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2004-09-01T00:00:00</schema:startDate> + <schema:endDate rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2004-09-30T00:00:00</schema:endDate> + </dct:PeriodOfTime> + </dct:temporal> + <dcatde:licenseAttributionByText>Statistisches Amt für Hamburg und Schleswig-Holstein - Anstalt des öffentlichen Rechts - (Statistikamt Nord)</dcatde:licenseAttributionByText> + <dcat:theme rdf:resource="http://publications.europa.eu/resource/authority/data-theme/AGRI"/> + <dcat:contactPoint> + <vcard:Organization rdf:nodeID="N5c233ba34b6f4a2dbee00193aafa12f0"> + <vcard:fn>Statistisches Amt für Hamburg und Schleswig-Holstein</vcard:fn> + <vcard:hasEmail rdf:resource="mailto:info@statistik-nord.de"/> + </vcard:Organization> + </dcat:contactPoint> + <dct:identifier>StaNord_CMS:50330</dct:identifier> + <dcat:landingPage rdf:resource="http://www.statistik-nord.de"/> + <dcat:distribution> + <dcat:Distribution rdf:about="http://opendata.schleswig-holstein.de/dataset/StaNord_CMS:50330/resource/7b80638b-b65f-44fe-ab7a-95855db890b6"> + <dct:title>Ernteberichterstattung über Feldfrüchte und Grünland in Schleswig-Holstein - Endgültige Erträge und vorläufige Erntemengen von Getreide und Ölfrüchten, vorläufige Kartoffelernte 2004</dct:title> + <dcatde:licenseAttributionByText>Statistisches Amt für Hamburg und Schleswig-Holstein - Anstalt des öffentlichen Rechts - (Statistikamt Nord)</dcatde:licenseAttributionByText> + <dcat:accessURL rdf:resource="https://www.statistik-nord.de/fileadmin/Dokumente/Statistische_Berichte/landwirtschaft/C_II_1_m_S/C_II_1_m0409_S.pdf"/> + <dcat:downloadURL rdf:resource="http://example.org/file.csv"/> + <dcat:mediaType rdf:resource="">https://www.iana.org/assignments/media-types/text/csv</dcat:mediaType> + <dcat:byteSize rdf:datatype="http://www.w3.org/2001/XMLSchema#decimal">112874985.0</dcat:byteSize> + <dct:license rdf:resource="http://dcat-ap.de/def/licenses/dl-by-de/2.0"/> + <dct:format rdf:resource="http://publications.europa.eu/resource/authority/file-type/PDF"/> + </dcat:Distribution> + </dcat:distribution> + </dcat:Dataset> + </dcat:dataset> + </dcat:Catalog> + </rdf:RDF> diff --git a/src/test/resources/with_downloadURL.xml b/src/test/resources/with_downloadURL.xml index 655952beb3f55468829d44a0da344b24190df090..6851913681fbaf35b5117db1860cc1ad67cc92af 100644 --- a/src/test/resources/with_downloadURL.xml +++ b/src/test/resources/with_downloadURL.xml @@ -49,6 +49,7 @@ <dcatde:licenseAttributionByText>Statistisches Amt für Hamburg und Schleswig-Holstein - Anstalt des öffentlichen Rechts - (Statistikamt Nord)</dcatde:licenseAttributionByText> <dcat:accessURL rdf:resource="https://www.statistik-nord.de/fileadmin/Dokumente/Statistische_Berichte/landwirtschaft/C_II_1_m_S/C_II_1_m0409_S.pdf"/> <dcat:downloadURL rdf:resource="http://example.org/file.csv"/> + <dcat:mediaType>text/csv</dcat:mediaType> <dcat:byteSize rdf:datatype="http://www.w3.org/2001/XMLSchema#decimal">112874985.0</dcat:byteSize> <dct:license rdf:resource="http://dcat-ap.de/def/licenses/dl-by-de/2.0"/> <dct:format rdf:resource="http://publications.europa.eu/resource/authority/file-type/PDF"/>