Skip to content
Snippets Groups Projects
Commit c93467d0 authored by Jesper Zedlitz's avatar Jesper Zedlitz
Browse files

Merge branch '7-dcat-mediatype-korrigieren' into 'master'

Resolve "dcat:mediaType korrigieren"

Closes #7

See merge request !4
parents 91b944ae a4e4ab3f
Branches
No related tags found
1 merge request!4Resolve "dcat:mediaType korrigieren"
Pipeline #402 passed
......@@ -10,7 +10,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x
</parent>
<groupId>de.landsh.opendata</groupId>
<artifactId>dcat-catalog-proxy</artifactId>
<version>1.2</version>
<version>1.3.0</version>
<name>dcat-catalog-proxy</name>
<description>DCAT catalog proxy</description>
<properties>
......
......@@ -78,6 +78,7 @@ public class CatalogFilter implements InitializingBean {
addDownloadURLs(model);
addAccessRights(model);
addRights(model);
fixMediaType(model);
return model;
}
......@@ -166,6 +167,24 @@ public class CatalogFilter implements InitializingBean {
}
}
/**
* The dcat:mediaType of a Distribution must be a resources. Sometime CKAN returns a literal.
* This method will fix that problem.
*/
void fixMediaType(Model model) {
final ResIterator it = model.listSubjectsWithProperty(RDF.type, DCAT.Distribution);
while (it.hasNext()) {
final Resource distribution = it.next();
final Statement mediaTypeStatement = distribution.getProperty(DCAT.mediaType);
if (mediaTypeStatement != null && mediaTypeStatement.getObject().isLiteral()) {
final String mimeType = mediaTypeStatement.getLiteral().getString().toLowerCase();
distribution.removeAll(DCAT.mediaType);
distribution.addProperty(DCAT.mediaType, model.createResource("https://www.iana.org/assignments/media-types/" + mimeType));
}
}
}
void rewriteHydraURLs(Model model) {
final ResIterator it = model.listSubjectsWithProperty(RDF.type, ResourceFactory.createResource("http://www.w3.org/ns/hydra/core#PagedCollection"));
if (it.hasNext()) {
......
......@@ -57,9 +57,9 @@ public class CatalogFilterTest {
@Test
public void work() throws Exception {
final InputStream inputStream = getClass().getResourceAsStream("/catalog.xml");
catalogFilter.work(inputStream);
inputStream.close();
try (final InputStream inputStream = getClass().getResourceAsStream("/catalog.xml")) {
catalogFilter.work(inputStream);
}
}
@Test
......@@ -145,13 +145,13 @@ public class CatalogFilterTest {
*/
@Test
public void work_will_preseve_collections() throws Exception {
final InputStream inputStream = getClass().getResourceAsStream("/with_collection.xml");
final Model model = catalogFilter.work(inputStream);
try (final InputStream inputStream = getClass().getResourceAsStream("/with_collection.xml")) {
final Model model = catalogFilter.work(inputStream);
Assertions.assertEquals(8, countInstances(model, DCAT.Dataset));
Assertions.assertEquals(7, countInstances(model, DCAT.Distribution));
Assertions.assertEquals(8, countInstances(model, DCAT.Dataset));
Assertions.assertEquals(7, countInstances(model, DCAT.Distribution));
inputStream.close();
}
}
/**
......@@ -160,23 +160,22 @@ public class CatalogFilterTest {
*/
@Test
public void work_will_add_accessRights() throws Exception {
final InputStream inputStream = getClass().getResourceAsStream("/with_collection.xml");
final Model model = catalogFilter.work(inputStream);
// Every dataset has a dct:accessRights statement
final ResIterator it = model.listSubjectsWithProperty(RDF.type, DCAT.Dataset);
int count = 0;
while (it.hasNext()) {
final Resource distribution = it.next();
count++;
final Resource accessRights = distribution.getPropertyResourceValue(DCTerms.accessRights);
assertNotNull(accessRights);
assertEquals("http://publications.europa.eu/resource/authority/access-right/PUBLIC", accessRights.getURI());
try (final InputStream inputStream = getClass().getResourceAsStream("/with_collection.xml")) {
final Model model = catalogFilter.work(inputStream);
// Every dataset has a dct:accessRights statement
final ResIterator it = model.listSubjectsWithProperty(RDF.type, DCAT.Dataset);
int count = 0;
while (it.hasNext()) {
final Resource distribution = it.next();
count++;
final Resource accessRights = distribution.getPropertyResourceValue(DCTerms.accessRights);
assertNotNull(accessRights);
assertEquals("http://publications.europa.eu/resource/authority/access-right/PUBLIC", accessRights.getURI());
}
assertEquals(8, count);
}
assertEquals(8, count);
inputStream.close();
}
/**
......@@ -184,24 +183,23 @@ public class CatalogFilterTest {
*/
@Test
public void work_will_add_rights() throws Exception {
final InputStream inputStream = getClass().getResourceAsStream("/with_collection.xml");
final Model model = catalogFilter.work(inputStream);
// Every dataset has a dct:accessRights statement
final ResIterator it = model.listSubjectsWithProperty(RDF.type, DCAT.Distribution);
int count = 0;
while (it.hasNext()) {
final Resource distribution = it.next();
count++;
final Resource rights = distribution.getPropertyResourceValue(DCTerms.rights);
final Resource license = distribution.getPropertyResourceValue(DCTerms.license);
assertNotNull(rights);
assertEquals(license, rights);
try (final InputStream inputStream = getClass().getResourceAsStream("/with_collection.xml")) {
final Model model = catalogFilter.work(inputStream);
// Every dataset has a dct:accessRights statement
final ResIterator it = model.listSubjectsWithProperty(RDF.type, DCAT.Distribution);
int count = 0;
while (it.hasNext()) {
final Resource distribution = it.next();
count++;
final Resource rights = distribution.getPropertyResourceValue(DCTerms.rights);
final Resource license = distribution.getPropertyResourceValue(DCTerms.license);
assertNotNull(rights);
assertEquals(license, rights);
}
assertEquals(7, count);
}
assertEquals(7, count);
inputStream.close();
}
/**
......@@ -211,7 +209,7 @@ public class CatalogFilterTest {
@Test
public void work_invalid_iri() {
final InputStream inputStream = getClass().getResourceAsStream("/invalid_iri.xml");
final Model model = catalogFilter.work(inputStream);
catalogFilter.work(inputStream);
}
/**
......@@ -221,13 +219,12 @@ public class CatalogFilterTest {
public void work_will_remove_non_government_organization() throws IOException {
catalogFilter.unwantedPublishers = Collections.singletonList("https://opendata.schleswig-holstein.de/organization/ee4df032-ec5f-4726-b7ad-a2c708fb53ec");
final InputStream inputStream = getClass().getResourceAsStream("/two-organizations.xml");
final Model model = catalogFilter.work(inputStream);
Assertions.assertEquals(1, countInstances(model, DCAT.Dataset));
Assertions.assertEquals(1, countInstances(model, DCAT.Distribution));
try (final InputStream inputStream = getClass().getResourceAsStream("/two-organizations.xml")) {
final Model model = catalogFilter.work(inputStream);
inputStream.close();
Assertions.assertEquals(1, countInstances(model, DCAT.Dataset));
Assertions.assertEquals(1, countInstances(model, DCAT.Distribution));
}
}
/**
......@@ -235,9 +232,34 @@ public class CatalogFilterTest {
* catalog proxy must be able to cope with this.
*/
@Test
public void work_invalid_uri() {
final InputStream inputStream = getClass().getResourceAsStream("/invalid_uri.xml");
catalogFilter.work(inputStream);
public void work_invalid_uri() throws IOException {
try (InputStream inputStream = getClass().getResourceAsStream("/invalid_uri.xml")) {
catalogFilter.work(inputStream);
}
}
@Test
public void fixMediaType() {
final Model model = parseRdf(getClass().getResourceAsStream("/with_downloadURL.xml"));
catalogFilter.fixMediaType(model);
final ResIterator it = model.listSubjectsWithProperty(RDF.type, DCAT.Distribution);
final Resource distribution = it.next();
final Resource mediaType = distribution.getPropertyResourceValue(DCAT.mediaType);
assertNotNull(mediaType);
assertEquals("https://www.iana.org/assignments/media-types/text/csv", mediaType.getURI());
}
public void fixMediaType_alreadyCorrect() {
final Model model = parseRdf(getClass().getResourceAsStream("/mediaType.xml"));
catalogFilter.fixMediaType(model);
final ResIterator it = model.listSubjectsWithProperty(RDF.type, DCAT.Distribution);
final Resource distribution = it.next();
final Resource mediaType = distribution.getPropertyResourceValue(DCAT.mediaType);
assertNotNull(mediaType);
assertEquals("https://www.iana.org/assignments/media-types/text/csv", mediaType.getURI());
}
}
<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF
xmlns:foaf="http://xmlns.com/foaf/0.1/"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:dcat="http://www.w3.org/ns/dcat#"
xmlns:dct="http://purl.org/dc/terms/"
xmlns:schema="http://schema.org/"
xmlns:vcard="http://www.w3.org/2006/vcard/ns#"
xmlns:dcatde="http://dcat-ap.de/def/dcatde/1.0.1/"
>
<dcat:Catalog rdf:about="http://opendata.schleswig-holstein.de">
<dcat:dataset>
<dcat:Dataset rdf:about="http://opendata.schleswig-holstein.de/dataset/StaNord_CMS:50330">
<dcat:keyword>Ernteberichterstattung über Feldfrüchte und Grünland in Schleswig-Holstein</dcat:keyword>
<dcatde:contributorID rdf:resource="http://dcat-ap.de/def/contributors/schleswigHolstein"/>
<dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2019-06-01T13:46:55.291496</dct:issued>
<dct:publisher rdf:resource="http://opendata.schleswig-holstein.de/organization/statistikamt-nord"/>
<dct:license rdf:resource="http://dcat-ap.de/def/licenses/dl-by-de/2.0"/>
<dct:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2019-06-01T13:46:55.338566</dct:modified>
<dct:description>Ernteberichterstattung über Feldfrüchte und Grünland in Schleswig-Holstein - Endgültige Erträge und vorläufige Erntemengen von Getreide und Ölfrüchten, vorläufige Kartoffelernte 2004</dct:description>
<dct:title>Ernteberichterstattung über Feldfrüchte und Grünland in Schleswig-Holstein - Endgültige Erträge und vorläufige Erntemengen von Getreide und Ölfrüchten, vorläufige Kartoffelernte 2004</dct:title>
<dct:creator>
<foaf:Organization rdf:nodeID="Nc233acd517104832885bdfe3c81f3a01">
<foaf:name>Statistisches Amt für Hamburg und Schleswig-Holstein</foaf:name>
<foaf:mbox>info@statistik-nord.de</foaf:mbox>
</foaf:Organization>
</dct:creator>
<dct:spatial rdf:resource="http://dcat-ap.de/def/politicalGeocoding/stateKey/01"/>
<dcat:keyword>Ernte: Feldfrüchte und Grünland</dcat:keyword>
<dct:temporal>
<dct:PeriodOfTime rdf:nodeID="N5cc5efcb2099487a95f039c45a53e3d8">
<schema:startDate rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2004-09-01T00:00:00</schema:startDate>
<schema:endDate rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2004-09-30T00:00:00</schema:endDate>
</dct:PeriodOfTime>
</dct:temporal>
<dcatde:licenseAttributionByText>Statistisches Amt für Hamburg und Schleswig-Holstein - Anstalt des öffentlichen Rechts - (Statistikamt Nord)</dcatde:licenseAttributionByText>
<dcat:theme rdf:resource="http://publications.europa.eu/resource/authority/data-theme/AGRI"/>
<dcat:contactPoint>
<vcard:Organization rdf:nodeID="N5c233ba34b6f4a2dbee00193aafa12f0">
<vcard:fn>Statistisches Amt für Hamburg und Schleswig-Holstein</vcard:fn>
<vcard:hasEmail rdf:resource="mailto:info@statistik-nord.de"/>
</vcard:Organization>
</dcat:contactPoint>
<dct:identifier>StaNord_CMS:50330</dct:identifier>
<dcat:landingPage rdf:resource="http://www.statistik-nord.de"/>
<dcat:distribution>
<dcat:Distribution rdf:about="http://opendata.schleswig-holstein.de/dataset/StaNord_CMS:50330/resource/7b80638b-b65f-44fe-ab7a-95855db890b6">
<dct:title>Ernteberichterstattung über Feldfrüchte und Grünland in Schleswig-Holstein - Endgültige Erträge und vorläufige Erntemengen von Getreide und Ölfrüchten, vorläufige Kartoffelernte 2004</dct:title>
<dcatde:licenseAttributionByText>Statistisches Amt für Hamburg und Schleswig-Holstein - Anstalt des öffentlichen Rechts - (Statistikamt Nord)</dcatde:licenseAttributionByText>
<dcat:accessURL rdf:resource="https://www.statistik-nord.de/fileadmin/Dokumente/Statistische_Berichte/landwirtschaft/C_II_1_m_S/C_II_1_m0409_S.pdf"/>
<dcat:downloadURL rdf:resource="http://example.org/file.csv"/>
<dcat:mediaType rdf:resource="">https://www.iana.org/assignments/media-types/text/csv</dcat:mediaType>
<dcat:byteSize rdf:datatype="http://www.w3.org/2001/XMLSchema#decimal">112874985.0</dcat:byteSize>
<dct:license rdf:resource="http://dcat-ap.de/def/licenses/dl-by-de/2.0"/>
<dct:format rdf:resource="http://publications.europa.eu/resource/authority/file-type/PDF"/>
</dcat:Distribution>
</dcat:distribution>
</dcat:Dataset>
</dcat:dataset>
</dcat:Catalog>
</rdf:RDF>
......@@ -49,6 +49,7 @@
<dcatde:licenseAttributionByText>Statistisches Amt für Hamburg und Schleswig-Holstein - Anstalt des öffentlichen Rechts - (Statistikamt Nord)</dcatde:licenseAttributionByText>
<dcat:accessURL rdf:resource="https://www.statistik-nord.de/fileadmin/Dokumente/Statistische_Berichte/landwirtschaft/C_II_1_m_S/C_II_1_m0409_S.pdf"/>
<dcat:downloadURL rdf:resource="http://example.org/file.csv"/>
<dcat:mediaType>text/csv</dcat:mediaType>
<dcat:byteSize rdf:datatype="http://www.w3.org/2001/XMLSchema#decimal">112874985.0</dcat:byteSize>
<dct:license rdf:resource="http://dcat-ap.de/def/licenses/dl-by-de/2.0"/>
<dct:format rdf:resource="http://publications.europa.eu/resource/authority/file-type/PDF"/>
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment