From 0a1e5fdc360a19f31dabdba42bfee31e8c484083 Mon Sep 17 00:00:00 2001
From: OZG-Cloud Team <noreply@ozg-sh.de>
Date: Mon, 11 Nov 2024 15:01:05 +0100
Subject: [PATCH] OZG-7077 unescape html characters

---
 .../formcycle/FormDataHtmlCleaner.java        | 10 +--
 .../formcycle/FormDataHtmlCleanerTest.java    | 68 +++++++++++++------
 2 files changed, 53 insertions(+), 25 deletions(-)

diff --git a/formcycle-adapter/formcycle-adapter-impl/src/main/java/de/ozgcloud/eingang/formcycle/FormDataHtmlCleaner.java b/formcycle-adapter/formcycle-adapter-impl/src/main/java/de/ozgcloud/eingang/formcycle/FormDataHtmlCleaner.java
index 5e5c45a1..20047510 100644
--- a/formcycle-adapter/formcycle-adapter-impl/src/main/java/de/ozgcloud/eingang/formcycle/FormDataHtmlCleaner.java
+++ b/formcycle-adapter/formcycle-adapter-impl/src/main/java/de/ozgcloud/eingang/formcycle/FormDataHtmlCleaner.java
@@ -31,6 +31,7 @@ import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
 import org.jsoup.safety.Safelist;
 import org.springframework.stereotype.Component;
+import org.springframework.web.util.HtmlUtils;
 
 import de.ozgcloud.eingang.common.formdata.FormData;
 
@@ -58,12 +59,13 @@ public class FormDataHtmlCleaner {
 	}
 
 	Object parseHtml(String html) {
-		var jsoupDocument = Jsoup.parse(html);
 		var outputSettings = new Document.OutputSettings(); // keep new lines
 		outputSettings.prettyPrint(false);
-		jsoupDocument.outputSettings(outputSettings);
-		var innerHtml = jsoupDocument.html().replace("\\\\n", "\n");
-		return Jsoup.clean(innerHtml, "", Safelist.none(), outputSettings);
+		var htmlText = Jsoup.clean(modifyInputHtml(html), "", Safelist.none(), outputSettings);
+		return HtmlUtils.htmlUnescape(htmlText);
 	}
 
+	String modifyInputHtml(String html) {
+		return html.replace("\\\\n", "\n");
+	}
 }
diff --git a/formcycle-adapter/formcycle-adapter-impl/src/test/java/de/ozgcloud/eingang/formcycle/FormDataHtmlCleanerTest.java b/formcycle-adapter/formcycle-adapter-impl/src/test/java/de/ozgcloud/eingang/formcycle/FormDataHtmlCleanerTest.java
index 4f1109c2..bdc3bdf2 100644
--- a/formcycle-adapter/formcycle-adapter-impl/src/test/java/de/ozgcloud/eingang/formcycle/FormDataHtmlCleanerTest.java
+++ b/formcycle-adapter/formcycle-adapter-impl/src/test/java/de/ozgcloud/eingang/formcycle/FormDataHtmlCleanerTest.java
@@ -30,8 +30,11 @@ import java.util.List;
 import java.util.Map;
 
 import org.assertj.core.data.MapEntry;
+import org.junit.jupiter.api.DisplayName;
 import org.junit.jupiter.api.Nested;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.CsvSource;
 import org.mockito.InjectMocks;
 import org.mockito.Spy;
 
@@ -178,44 +181,39 @@ class FormDataHtmlCleanerTest {
 		static final String KEY_VALUE = "value";
 
 		static final Map<String, Object> FORM_DATA_MAP = Map.of("tf1", Map.of(
-						KEY_LABEL, "<p><em>&Auml;</em></p>",
-						KEY_VALUE, "Ä - Wert"),
-				"tf2", Map.of(
-						KEY_LABEL, "<p><strong>&Ouml;</strong></p>",
-						KEY_VALUE, "Ö - Wert"),
+						KEY_LABEL, "<p><em>Label</em></p>",
+						KEY_VALUE, "<i>Value</i>"),
 				"fs1", Map.of(
-						KEY_LABEL, "Ü",
+						KEY_LABEL, "Key",
 						KEY_VALUE, Map.of(
 								"tf3", Map.of(
-										KEY_LABEL, " <p><s>Label mit</s> &szlig;</p>",
-										KEY_VALUE, "ein Text mit ß und <html><body><h1>Hello</h1><body><html>")),
+										KEY_LABEL, "<p><s>Label</s></p>",
+										KEY_VALUE, "ein Text mit <html><body><h1>Hello</h1><body><html>")),
 						"tf4", Map.of(
-								KEY_LABEL, "<p><span style=\"background-color:#1abc9c;\">&auml;</span></p>",
+								KEY_LABEL, "<p><span style=\"background-color:#1abc9c;\">Wichtig</span></p>",
 								KEY_VALUE, "Text"),
 						"ed1", Map.of(
 								KEY_LABEL,
-								"<ol>\n\t<li><em><strong><u>&ouml;</u></strong></em></li>\n\t<li><span style=\"color:#e74c3c;\">&uuml;</span></li>\n</ol>",
+								"<ol>\n\t<li><em><strong><u>ganz</u></strong></em></li>\n\t<li><span style=\"color:#e74c3c;"
+										+ "\">bunt</span></li>\n</ol>",
 								KEY_VALUE, "TExt\nmit\n    Leerzeichen\nund\n  Umbrüchen"
 						)));
 
 		static final Map<String, Object> EXPECTED_MAP = Map.of("tf1", Map.of(
-						KEY_LABEL, "Ä",
-						KEY_VALUE, "Ä - Wert"),
-				"tf2", Map.of(
-						KEY_LABEL, "Ö",
-						KEY_VALUE, "Ö - Wert"),
+						KEY_LABEL, "Label",
+						KEY_VALUE, "Value"),
 				"fs1", Map.of(
-						KEY_LABEL, "Ü",
+						KEY_LABEL, "Key",
 						KEY_VALUE, Map.of(
 								"tf3", Map.of(
-										KEY_LABEL, "Label mit ß",
-										KEY_VALUE, "ein Text mit ß und Hello")),
+										KEY_LABEL, "Label",
+										KEY_VALUE, "ein Text mit Hello")),
 						"tf4", Map.of(
-								KEY_LABEL, "ä",
+								KEY_LABEL, "Wichtig",
 								KEY_VALUE, "Text"),
 						"ed1", Map.of(
 								KEY_LABEL,
-								"\n\tö\n\tü\n",
+								"\n\tganz\n\tbunt\n",
 								KEY_VALUE, "TExt\nmit\n    Leerzeichen\nund\n  Umbrüchen"
 						)));
 
@@ -223,7 +221,35 @@ class FormDataHtmlCleanerTest {
 		void shouldCleanHtml() {
 			var result = cleaner.clean(FormData.builder().formData(FORM_DATA_MAP).build());
 
-			assertThat(result.getFormData()).isEqualTo(EXPECTED_MAP);
+			assertThat(result.getFormData()).usingRecursiveComparison().isEqualTo(EXPECTED_MAP);
+		}
+
+		@DisplayName("should decode german characters")
+		@ParameterizedTest(name = "{0} => {2}")
+		@CsvSource({ "&Auml;, &Auml;-Wert, Ä, Ä-Wert",
+				"&Ouml;, &Ouml;-Wert, Ö, Ö-Wert",
+				"&Uuml;, &Uuml;-Wert, Ü, Ü-Wert",
+				"&szlig;, Label mit &szlig;, ß, Label mit ß" })
+		void shouldDecodeUmlaut(String srcLabel, String srcValue, String expectedLabel, String expectedValue) {
+			var sourceMap = Map.<String, Object>of("tf1", Map.of(KEY_LABEL, srcLabel, KEY_VALUE, srcValue));
+			var expectedMap = Map.of("tf1", Map.of(KEY_LABEL, expectedLabel, KEY_VALUE, expectedValue));
+
+			var result = cleaner.cleanFormData(sourceMap);
+
+			assertThat(result).isEqualTo(expectedMap);
+		}
+
+		@DisplayName("should decode special characters")
+		@ParameterizedTest(name = "{0} => {1}")
+		@CsvSource(value = { "&amp;, &",
+				"&lt;, <,",
+				"&gt;, >",
+				"&quot;, \"",
+				"&#39;, '" }, quoteCharacter = '`')
+		void shouldDecodeAmpersand(String src, String expected) {
+			var result = cleaner.cleanFormData(Map.of(KEY_LABEL, src));
+
+			assertThat(result).isEqualTo(Map.of(KEY_LABEL, expected));
 		}
 	}
 }
\ No newline at end of file
-- 
GitLab