Skip to content
Snippets Groups Projects
Commit 263123ad authored by Jesper Zedlitz's avatar Jesper Zedlitz
Browse files

Auftrennen von vermischten deutschen und englischen Titeln

parent 0148c9f3
No related branches found
No related tags found
1 merge request!22Resolve "gemischte deutsch/englische Titel"
Pipeline #679 passed
...@@ -39,6 +39,8 @@ import java.util.stream.Collectors; ...@@ -39,6 +39,8 @@ import java.util.stream.Collectors;
public class MDMetadata2Dataset { public class MDMetadata2Dataset {
private static final String LANGUAGE_DE = "de"; private static final String LANGUAGE_DE = "de";
private static final String LANGUAGE_EN = "en";
private static final Logger log = LoggerFactory.getLogger(MDMetadata2Dataset.class); private static final Logger log = LoggerFactory.getLogger(MDMetadata2Dataset.class);
private static final String XPATH_OPERATION_GET_CAPABILITIES = "gmd:identificationInfo/*/srv:containsOperations/srv:SV_OperationMetadata[srv:operationName/gco:CharacterString/text()='GetCapabilities']"; private static final String XPATH_OPERATION_GET_CAPABILITIES = "gmd:identificationInfo/*/srv:containsOperations/srv:SV_OperationMetadata[srv:operationName/gco:CharacterString/text()='GetCapabilities']";
static IRIFactory factory = IRIFactory.iriImplementation(); static IRIFactory factory = IRIFactory.iriImplementation();
...@@ -663,8 +665,17 @@ public class MDMetadata2Dataset { ...@@ -663,8 +665,17 @@ public class MDMetadata2Dataset {
final String title = getTextOrNull(metadata.selectSingleNode("gmd:identificationInfo/*/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString")); final String title = getTextOrNull(metadata.selectSingleNode("gmd:identificationInfo/*/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString"));
if (title != null) { if (title != null) {
if( title.contains("#locale-eng:")) {
// In some source systems, German and English titles are strangely combined in one text.
String germanTitle = StringUtils.substringBefore(title, "#locale-eng:");
String englishTitle = StringUtils.substringAfter(title, "#locale-eng:");
dataset.addLiteral(DCTerms.title, model.createLiteral(germanTitle.trim(), LANGUAGE_DE));
dataset.addLiteral(DCTerms.title, model.createLiteral(englishTitle.trim(), LANGUAGE_EN));
} else {
dataset.addLiteral(DCTerms.title, model.createLiteral(title.trim(), LANGUAGE_DE)); dataset.addLiteral(DCTerms.title, model.createLiteral(title.trim(), LANGUAGE_DE));
} }
}
final String description = getTextOrNull(metadata.selectSingleNode("gmd:identificationInfo/*/gmd:abstract/gco:CharacterString")); final String description = getTextOrNull(metadata.selectSingleNode("gmd:identificationInfo/*/gmd:abstract/gco:CharacterString"));
if (description != null) { if (description != null) {
......
...@@ -1358,4 +1358,19 @@ public class MDMetadata2DatasetTests { ...@@ -1358,4 +1358,19 @@ public class MDMetadata2DatasetTests {
assertEquals("http://data.europa.eu/bna/c_ac64a52d", assertEquals("http://data.europa.eu/bna/c_ac64a52d",
result.getPropertyResourceValue(DCATAP.hvdCategory).getURI()); result.getPropertyResourceValue(DCATAP.hvdCategory).getURI());
} }
/**
* In some source systems, German and English titles are strangely combined in one text.
*/
@Test
public void convert_mixedGermanEnglishTitles() throws DocumentException, IOException {
final Document inputDocument = saxReader.read(getClass().getResourceAsStream("/00729c12-e689-446d-83bd-dd8cfd0eeeb8.xml"));
final Resource result = service.convert(inputDocument);
assertTrue(result.hasProperty( DCTerms.title, ResourceFactory.createLangLiteral(
"Robben im Schleswig-Holsteinischen Wattenmeer 1995", "de") ));
assertTrue(result.hasProperty( DCTerms.title, ResourceFactory.createLangLiteral(
"Seals in the Schleswig-Holstein Wadden Sea in 1995", "en") ));
}
} }
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment