From 0a1e5fdc360a19f31dabdba42bfee31e8c484083 Mon Sep 17 00:00:00 2001 From: OZG-Cloud Team <noreply@ozg-sh.de> Date: Mon, 11 Nov 2024 15:01:05 +0100 Subject: [PATCH] OZG-7077 unescape html characters --- .../formcycle/FormDataHtmlCleaner.java | 10 +-- .../formcycle/FormDataHtmlCleanerTest.java | 68 +++++++++++++------ 2 files changed, 53 insertions(+), 25 deletions(-) diff --git a/formcycle-adapter/formcycle-adapter-impl/src/main/java/de/ozgcloud/eingang/formcycle/FormDataHtmlCleaner.java b/formcycle-adapter/formcycle-adapter-impl/src/main/java/de/ozgcloud/eingang/formcycle/FormDataHtmlCleaner.java index 5e5c45a1..20047510 100644 --- a/formcycle-adapter/formcycle-adapter-impl/src/main/java/de/ozgcloud/eingang/formcycle/FormDataHtmlCleaner.java +++ b/formcycle-adapter/formcycle-adapter-impl/src/main/java/de/ozgcloud/eingang/formcycle/FormDataHtmlCleaner.java @@ -31,6 +31,7 @@ import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.safety.Safelist; import org.springframework.stereotype.Component; +import org.springframework.web.util.HtmlUtils; import de.ozgcloud.eingang.common.formdata.FormData; @@ -58,12 +59,13 @@ public class FormDataHtmlCleaner { } Object parseHtml(String html) { - var jsoupDocument = Jsoup.parse(html); var outputSettings = new Document.OutputSettings(); // keep new lines outputSettings.prettyPrint(false); - jsoupDocument.outputSettings(outputSettings); - var innerHtml = jsoupDocument.html().replace("\\\\n", "\n"); - return Jsoup.clean(innerHtml, "", Safelist.none(), outputSettings); + var htmlText = Jsoup.clean(modifyInputHtml(html), "", Safelist.none(), outputSettings); + return HtmlUtils.htmlUnescape(htmlText); } + String modifyInputHtml(String html) { + return html.replace("\\\\n", "\n"); + } } diff --git a/formcycle-adapter/formcycle-adapter-impl/src/test/java/de/ozgcloud/eingang/formcycle/FormDataHtmlCleanerTest.java b/formcycle-adapter/formcycle-adapter-impl/src/test/java/de/ozgcloud/eingang/formcycle/FormDataHtmlCleanerTest.java index 4f1109c2..bdc3bdf2 100644 --- a/formcycle-adapter/formcycle-adapter-impl/src/test/java/de/ozgcloud/eingang/formcycle/FormDataHtmlCleanerTest.java +++ b/formcycle-adapter/formcycle-adapter-impl/src/test/java/de/ozgcloud/eingang/formcycle/FormDataHtmlCleanerTest.java @@ -30,8 +30,11 @@ import java.util.List; import java.util.Map; import org.assertj.core.data.MapEntry; +import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; import org.mockito.InjectMocks; import org.mockito.Spy; @@ -178,44 +181,39 @@ class FormDataHtmlCleanerTest { static final String KEY_VALUE = "value"; static final Map<String, Object> FORM_DATA_MAP = Map.of("tf1", Map.of( - KEY_LABEL, "<p><em>Ä</em></p>", - KEY_VALUE, "Ä - Wert"), - "tf2", Map.of( - KEY_LABEL, "<p><strong>Ö</strong></p>", - KEY_VALUE, "Ö - Wert"), + KEY_LABEL, "<p><em>Label</em></p>", + KEY_VALUE, "<i>Value</i>"), "fs1", Map.of( - KEY_LABEL, "Ü", + KEY_LABEL, "Key", KEY_VALUE, Map.of( "tf3", Map.of( - KEY_LABEL, " <p><s>Label mit</s> ß</p>", - KEY_VALUE, "ein Text mit ß und <html><body><h1>Hello</h1><body><html>")), + KEY_LABEL, "<p><s>Label</s></p>", + KEY_VALUE, "ein Text mit <html><body><h1>Hello</h1><body><html>")), "tf4", Map.of( - KEY_LABEL, "<p><span style=\"background-color:#1abc9c;\">ä</span></p>", + KEY_LABEL, "<p><span style=\"background-color:#1abc9c;\">Wichtig</span></p>", KEY_VALUE, "Text"), "ed1", Map.of( KEY_LABEL, - "<ol>\n\t<li><em><strong><u>ö</u></strong></em></li>\n\t<li><span style=\"color:#e74c3c;\">ü</span></li>\n</ol>", + "<ol>\n\t<li><em><strong><u>ganz</u></strong></em></li>\n\t<li><span style=\"color:#e74c3c;" + + "\">bunt</span></li>\n</ol>", KEY_VALUE, "TExt\nmit\n Leerzeichen\nund\n Umbrüchen" ))); static final Map<String, Object> EXPECTED_MAP = Map.of("tf1", Map.of( - KEY_LABEL, "Ä", - KEY_VALUE, "Ä - Wert"), - "tf2", Map.of( - KEY_LABEL, "Ö", - KEY_VALUE, "Ö - Wert"), + KEY_LABEL, "Label", + KEY_VALUE, "Value"), "fs1", Map.of( - KEY_LABEL, "Ü", + KEY_LABEL, "Key", KEY_VALUE, Map.of( "tf3", Map.of( - KEY_LABEL, "Label mit ß", - KEY_VALUE, "ein Text mit ß und Hello")), + KEY_LABEL, "Label", + KEY_VALUE, "ein Text mit Hello")), "tf4", Map.of( - KEY_LABEL, "ä", + KEY_LABEL, "Wichtig", KEY_VALUE, "Text"), "ed1", Map.of( KEY_LABEL, - "\n\tö\n\tü\n", + "\n\tganz\n\tbunt\n", KEY_VALUE, "TExt\nmit\n Leerzeichen\nund\n Umbrüchen" ))); @@ -223,7 +221,35 @@ class FormDataHtmlCleanerTest { void shouldCleanHtml() { var result = cleaner.clean(FormData.builder().formData(FORM_DATA_MAP).build()); - assertThat(result.getFormData()).isEqualTo(EXPECTED_MAP); + assertThat(result.getFormData()).usingRecursiveComparison().isEqualTo(EXPECTED_MAP); + } + + @DisplayName("should decode german characters") + @ParameterizedTest(name = "{0} => {2}") + @CsvSource({ "Ä, Ä-Wert, Ä, Ä-Wert", + "Ö, Ö-Wert, Ö, Ö-Wert", + "Ü, Ü-Wert, Ü, Ü-Wert", + "ß, Label mit ß, ß, Label mit ß" }) + void shouldDecodeUmlaut(String srcLabel, String srcValue, String expectedLabel, String expectedValue) { + var sourceMap = Map.<String, Object>of("tf1", Map.of(KEY_LABEL, srcLabel, KEY_VALUE, srcValue)); + var expectedMap = Map.of("tf1", Map.of(KEY_LABEL, expectedLabel, KEY_VALUE, expectedValue)); + + var result = cleaner.cleanFormData(sourceMap); + + assertThat(result).isEqualTo(expectedMap); + } + + @DisplayName("should decode special characters") + @ParameterizedTest(name = "{0} => {1}") + @CsvSource(value = { "&, &", + "<, <,", + ">, >", + "", \"", + "', '" }, quoteCharacter = '`') + void shouldDecodeAmpersand(String src, String expected) { + var result = cleaner.cleanFormData(Map.of(KEY_LABEL, src)); + + assertThat(result).isEqualTo(Map.of(KEY_LABEL, expected)); } } } \ No newline at end of file -- GitLab