Skip to content
Snippets Groups Projects
Commit 0a1e5fdc authored by OZG-Cloud Team's avatar OZG-Cloud Team
Browse files

OZG-7077 unescape html characters

parent 89ac4b2b
Branches
Tags
No related merge requests found
...@@ -31,6 +31,7 @@ import org.jsoup.Jsoup; ...@@ -31,6 +31,7 @@ import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.safety.Safelist; import org.jsoup.safety.Safelist;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import org.springframework.web.util.HtmlUtils;
import de.ozgcloud.eingang.common.formdata.FormData; import de.ozgcloud.eingang.common.formdata.FormData;
...@@ -58,12 +59,13 @@ public class FormDataHtmlCleaner { ...@@ -58,12 +59,13 @@ public class FormDataHtmlCleaner {
} }
Object parseHtml(String html) { Object parseHtml(String html) {
var jsoupDocument = Jsoup.parse(html);
var outputSettings = new Document.OutputSettings(); // keep new lines var outputSettings = new Document.OutputSettings(); // keep new lines
outputSettings.prettyPrint(false); outputSettings.prettyPrint(false);
jsoupDocument.outputSettings(outputSettings); var htmlText = Jsoup.clean(modifyInputHtml(html), "", Safelist.none(), outputSettings);
var innerHtml = jsoupDocument.html().replace("\\\\n", "\n"); return HtmlUtils.htmlUnescape(htmlText);
return Jsoup.clean(innerHtml, "", Safelist.none(), outputSettings);
} }
String modifyInputHtml(String html) {
return html.replace("\\\\n", "\n");
}
} }
...@@ -30,8 +30,11 @@ import java.util.List; ...@@ -30,8 +30,11 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import org.assertj.core.data.MapEntry; import org.assertj.core.data.MapEntry;
import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.CsvSource;
import org.mockito.InjectMocks; import org.mockito.InjectMocks;
import org.mockito.Spy; import org.mockito.Spy;
...@@ -178,44 +181,39 @@ class FormDataHtmlCleanerTest { ...@@ -178,44 +181,39 @@ class FormDataHtmlCleanerTest {
static final String KEY_VALUE = "value"; static final String KEY_VALUE = "value";
static final Map<String, Object> FORM_DATA_MAP = Map.of("tf1", Map.of( static final Map<String, Object> FORM_DATA_MAP = Map.of("tf1", Map.of(
KEY_LABEL, "<p><em>&Auml;</em></p>", KEY_LABEL, "<p><em>Label</em></p>",
KEY_VALUE, "Ä - Wert"), KEY_VALUE, "<i>Value</i>"),
"tf2", Map.of(
KEY_LABEL, "<p><strong>&Ouml;</strong></p>",
KEY_VALUE, "Ö - Wert"),
"fs1", Map.of( "fs1", Map.of(
KEY_LABEL, "Ü", KEY_LABEL, "Key",
KEY_VALUE, Map.of( KEY_VALUE, Map.of(
"tf3", Map.of( "tf3", Map.of(
KEY_LABEL, " <p><s>Label mit</s> &szlig;</p>", KEY_LABEL, "<p><s>Label</s></p>",
KEY_VALUE, "ein Text mit ß und <html><body><h1>Hello</h1><body><html>")), KEY_VALUE, "ein Text mit <html><body><h1>Hello</h1><body><html>")),
"tf4", Map.of( "tf4", Map.of(
KEY_LABEL, "<p><span style=\"background-color:#1abc9c;\">&auml;</span></p>", KEY_LABEL, "<p><span style=\"background-color:#1abc9c;\">Wichtig</span></p>",
KEY_VALUE, "Text"), KEY_VALUE, "Text"),
"ed1", Map.of( "ed1", Map.of(
KEY_LABEL, KEY_LABEL,
"<ol>\n\t<li><em><strong><u>&ouml;</u></strong></em></li>\n\t<li><span style=\"color:#e74c3c;\">&uuml;</span></li>\n</ol>", "<ol>\n\t<li><em><strong><u>ganz</u></strong></em></li>\n\t<li><span style=\"color:#e74c3c;"
+ "\">bunt</span></li>\n</ol>",
KEY_VALUE, "TExt\nmit\n Leerzeichen\nund\n Umbrüchen" KEY_VALUE, "TExt\nmit\n Leerzeichen\nund\n Umbrüchen"
))); )));
static final Map<String, Object> EXPECTED_MAP = Map.of("tf1", Map.of( static final Map<String, Object> EXPECTED_MAP = Map.of("tf1", Map.of(
KEY_LABEL, "Ä", KEY_LABEL, "Label",
KEY_VALUE, "Ä - Wert"), KEY_VALUE, "Value"),
"tf2", Map.of(
KEY_LABEL, "Ö",
KEY_VALUE, "Ö - Wert"),
"fs1", Map.of( "fs1", Map.of(
KEY_LABEL, "Ü", KEY_LABEL, "Key",
KEY_VALUE, Map.of( KEY_VALUE, Map.of(
"tf3", Map.of( "tf3", Map.of(
KEY_LABEL, "Label mit ß", KEY_LABEL, "Label",
KEY_VALUE, "ein Text mit ß und Hello")), KEY_VALUE, "ein Text mit Hello")),
"tf4", Map.of( "tf4", Map.of(
KEY_LABEL, "ä", KEY_LABEL, "Wichtig",
KEY_VALUE, "Text"), KEY_VALUE, "Text"),
"ed1", Map.of( "ed1", Map.of(
KEY_LABEL, KEY_LABEL,
"\n\tö\n\tü\n", "\n\tganz\n\tbunt\n",
KEY_VALUE, "TExt\nmit\n Leerzeichen\nund\n Umbrüchen" KEY_VALUE, "TExt\nmit\n Leerzeichen\nund\n Umbrüchen"
))); )));
...@@ -223,7 +221,35 @@ class FormDataHtmlCleanerTest { ...@@ -223,7 +221,35 @@ class FormDataHtmlCleanerTest {
void shouldCleanHtml() { void shouldCleanHtml() {
var result = cleaner.clean(FormData.builder().formData(FORM_DATA_MAP).build()); var result = cleaner.clean(FormData.builder().formData(FORM_DATA_MAP).build());
assertThat(result.getFormData()).isEqualTo(EXPECTED_MAP); assertThat(result.getFormData()).usingRecursiveComparison().isEqualTo(EXPECTED_MAP);
}
@DisplayName("should decode german characters")
@ParameterizedTest(name = "{0} => {2}")
@CsvSource({ "&Auml;, &Auml;-Wert, Ä, Ä-Wert",
"&Ouml;, &Ouml;-Wert, Ö, Ö-Wert",
"&Uuml;, &Uuml;-Wert, Ü, Ü-Wert",
"&szlig;, Label mit &szlig;, ß, Label mit ß" })
void shouldDecodeUmlaut(String srcLabel, String srcValue, String expectedLabel, String expectedValue) {
var sourceMap = Map.<String, Object>of("tf1", Map.of(KEY_LABEL, srcLabel, KEY_VALUE, srcValue));
var expectedMap = Map.of("tf1", Map.of(KEY_LABEL, expectedLabel, KEY_VALUE, expectedValue));
var result = cleaner.cleanFormData(sourceMap);
assertThat(result).isEqualTo(expectedMap);
}
@DisplayName("should decode special characters")
@ParameterizedTest(name = "{0} => {1}")
@CsvSource(value = { "&amp;, &",
"&lt;, <,",
"&gt;, >",
"&quot;, \"",
"&#39;, '" }, quoteCharacter = '`')
void shouldDecodeAmpersand(String src, String expected) {
var result = cleaner.cleanFormData(Map.of(KEY_LABEL, src));
assertThat(result).isEqualTo(Map.of(KEY_LABEL, expected));
} }
} }
} }
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment