Skip to content
Snippets Groups Projects
Commit 0a1e5fdc authored by OZG-Cloud Team's avatar OZG-Cloud Team
Browse files

OZG-7077 unescape html characters

parent 89ac4b2b
No related branches found
No related tags found
No related merge requests found
......@@ -31,6 +31,7 @@ import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.safety.Safelist;
import org.springframework.stereotype.Component;
import org.springframework.web.util.HtmlUtils;
import de.ozgcloud.eingang.common.formdata.FormData;
......@@ -58,12 +59,13 @@ public class FormDataHtmlCleaner {
}
Object parseHtml(String html) {
var jsoupDocument = Jsoup.parse(html);
var outputSettings = new Document.OutputSettings(); // keep new lines
outputSettings.prettyPrint(false);
jsoupDocument.outputSettings(outputSettings);
var innerHtml = jsoupDocument.html().replace("\\\\n", "\n");
return Jsoup.clean(innerHtml, "", Safelist.none(), outputSettings);
var htmlText = Jsoup.clean(modifyInputHtml(html), "", Safelist.none(), outputSettings);
return HtmlUtils.htmlUnescape(htmlText);
}
String modifyInputHtml(String html) {
return html.replace("\\\\n", "\n");
}
}
......@@ -30,8 +30,11 @@ import java.util.List;
import java.util.Map;
import org.assertj.core.data.MapEntry;
import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.CsvSource;
import org.mockito.InjectMocks;
import org.mockito.Spy;
......@@ -178,44 +181,39 @@ class FormDataHtmlCleanerTest {
static final String KEY_VALUE = "value";
static final Map<String, Object> FORM_DATA_MAP = Map.of("tf1", Map.of(
KEY_LABEL, "<p><em>&Auml;</em></p>",
KEY_VALUE, "Ä - Wert"),
"tf2", Map.of(
KEY_LABEL, "<p><strong>&Ouml;</strong></p>",
KEY_VALUE, "Ö - Wert"),
KEY_LABEL, "<p><em>Label</em></p>",
KEY_VALUE, "<i>Value</i>"),
"fs1", Map.of(
KEY_LABEL, "Ü",
KEY_LABEL, "Key",
KEY_VALUE, Map.of(
"tf3", Map.of(
KEY_LABEL, " <p><s>Label mit</s> &szlig;</p>",
KEY_VALUE, "ein Text mit ß und <html><body><h1>Hello</h1><body><html>")),
KEY_LABEL, "<p><s>Label</s></p>",
KEY_VALUE, "ein Text mit <html><body><h1>Hello</h1><body><html>")),
"tf4", Map.of(
KEY_LABEL, "<p><span style=\"background-color:#1abc9c;\">&auml;</span></p>",
KEY_LABEL, "<p><span style=\"background-color:#1abc9c;\">Wichtig</span></p>",
KEY_VALUE, "Text"),
"ed1", Map.of(
KEY_LABEL,
"<ol>\n\t<li><em><strong><u>&ouml;</u></strong></em></li>\n\t<li><span style=\"color:#e74c3c;\">&uuml;</span></li>\n</ol>",
"<ol>\n\t<li><em><strong><u>ganz</u></strong></em></li>\n\t<li><span style=\"color:#e74c3c;"
+ "\">bunt</span></li>\n</ol>",
KEY_VALUE, "TExt\nmit\n Leerzeichen\nund\n Umbrüchen"
)));
static final Map<String, Object> EXPECTED_MAP = Map.of("tf1", Map.of(
KEY_LABEL, "Ä",
KEY_VALUE, "Ä - Wert"),
"tf2", Map.of(
KEY_LABEL, "Ö",
KEY_VALUE, "Ö - Wert"),
KEY_LABEL, "Label",
KEY_VALUE, "Value"),
"fs1", Map.of(
KEY_LABEL, "Ü",
KEY_LABEL, "Key",
KEY_VALUE, Map.of(
"tf3", Map.of(
KEY_LABEL, "Label mit ß",
KEY_VALUE, "ein Text mit ß und Hello")),
KEY_LABEL, "Label",
KEY_VALUE, "ein Text mit Hello")),
"tf4", Map.of(
KEY_LABEL, "ä",
KEY_LABEL, "Wichtig",
KEY_VALUE, "Text"),
"ed1", Map.of(
KEY_LABEL,
"\n\tö\n\tü\n",
"\n\tganz\n\tbunt\n",
KEY_VALUE, "TExt\nmit\n Leerzeichen\nund\n Umbrüchen"
)));
......@@ -223,7 +221,35 @@ class FormDataHtmlCleanerTest {
void shouldCleanHtml() {
var result = cleaner.clean(FormData.builder().formData(FORM_DATA_MAP).build());
assertThat(result.getFormData()).isEqualTo(EXPECTED_MAP);
assertThat(result.getFormData()).usingRecursiveComparison().isEqualTo(EXPECTED_MAP);
}
@DisplayName("should decode german characters")
@ParameterizedTest(name = "{0} => {2}")
@CsvSource({ "&Auml;, &Auml;-Wert, Ä, Ä-Wert",
"&Ouml;, &Ouml;-Wert, Ö, Ö-Wert",
"&Uuml;, &Uuml;-Wert, Ü, Ü-Wert",
"&szlig;, Label mit &szlig;, ß, Label mit ß" })
void shouldDecodeUmlaut(String srcLabel, String srcValue, String expectedLabel, String expectedValue) {
var sourceMap = Map.<String, Object>of("tf1", Map.of(KEY_LABEL, srcLabel, KEY_VALUE, srcValue));
var expectedMap = Map.of("tf1", Map.of(KEY_LABEL, expectedLabel, KEY_VALUE, expectedValue));
var result = cleaner.cleanFormData(sourceMap);
assertThat(result).isEqualTo(expectedMap);
}
@DisplayName("should decode special characters")
@ParameterizedTest(name = "{0} => {1}")
@CsvSource(value = { "&amp;, &",
"&lt;, <,",
"&gt;, >",
"&quot;, \"",
"&#39;, '" }, quoteCharacter = '`')
void shouldDecodeAmpersand(String src, String expected) {
var result = cleaner.cleanFormData(Map.of(KEY_LABEL, src));
assertThat(result).isEqualTo(Map.of(KEY_LABEL, expected));
}
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment