diff --git a/formats/atom_format.py b/formats/atom_format.py new file mode 100644 index 0000000000000000000000000000000000000000..3c19d1bfc419ad2fdc457e1eed3f90a5ab5d8110 --- /dev/null +++ b/formats/atom_format.py @@ -0,0 +1,20 @@ +import xml.etree.ElementTree as ET + + +def is_valid(resource, file): + """Check if the HTTP response is an ATOM feed.""" + + with open(file.name, "rb") as f: + try: + xml = ET.parse(f).getroot() + + if xml.tag == "{http://www.w3.org/2005/Atom}feed": + return True + else: + resource["error"] = ( + "Root element is not {http://www.w3.org/2005/Atom}feed" + ) + return False + except Exception as e: + resource["error"] = str(e) + return False diff --git a/formats/docx_format.py b/formats/docx_format.py new file mode 100644 index 0000000000000000000000000000000000000000..3f471a5094c64c0f606bb5b0e1c288f93e5c3790 --- /dev/null +++ b/formats/docx_format.py @@ -0,0 +1,20 @@ +import zipfile + + +def is_valid(resource, file): + """Check if the content is a DOCX file.""" + + if not zipfile.is_zipfile(file.name): + resource["error"] = "Not a ZIP file." + return False + + with zipfile.ZipFile(file.name, "r") as zip_ref: + zip_contents = zip_ref.namelist() + + required_files = ["word/document.xml", "word/styles.xml"] + + if not all(file in zip_contents for file in required_files): + resource["error"] = "That does not look like an DOCX file." + return False + + return True diff --git a/formats/odt_format.py b/formats/odt_format.py new file mode 100644 index 0000000000000000000000000000000000000000..5cd30402796448db81e94a8f5385237434f6a44f --- /dev/null +++ b/formats/odt_format.py @@ -0,0 +1,27 @@ +import zipfile + + +def is_valid(resource, file): + """Check if the content is a ODT file.""" + + if not zipfile.is_zipfile(file.name): + resource["error"] = "Not a ZIP file." + return False + + with zipfile.ZipFile(file.name, "r") as zip_ref: + zip_contents = zip_ref.namelist() + + required_files = ["mimetype", "content.xml", "meta.xml", "styles.xml"] + + if not all(file in zip_contents for file in required_files): + resource["error"] = "That does not look like an ODT file." + return False + + with zip_ref.open("mimetype") as mimetype_file: + mimetype_content = mimetype_file.read().decode("utf-8").strip() + + if mimetype_content != "application/vnd.oasis.opendocument.text": + resource["error"] = f"Incorrect MIME type: {mimetype_content}" + return False + + return True diff --git a/formats/xlsx_format.py b/formats/xlsx_format.py new file mode 100644 index 0000000000000000000000000000000000000000..0799403ab350775754e135057de3c779290018d3 --- /dev/null +++ b/formats/xlsx_format.py @@ -0,0 +1,20 @@ +import zipfile + + +def is_valid(resource, file): + """Check if the content is a XLSX file.""" + + if not zipfile.is_zipfile(file.name): + resource["error"] = "Not a ZIP file." + return False + + with zipfile.ZipFile(file.name, "r") as zip_ref: + zip_contents = zip_ref.namelist() + + required_files = ["xl/workbook.xml", "xl/styles.xml"] + + if not all(file in zip_contents for file in required_files): + resource["error"] = "That does not look like an XLSX file." + return False + + return True diff --git a/tests/data/Atom_SH_Feldblockfinder_OpenGBD.xml b/tests/data/Atom_SH_Feldblockfinder_OpenGBD.xml new file mode 100644 index 0000000000000000000000000000000000000000..68423ea720a4fc6c02cf2e3f538988171ee01a82 --- /dev/null +++ b/tests/data/Atom_SH_Feldblockfinder_OpenGBD.xml @@ -0,0 +1,62 @@ +<?xml version="1.0" encoding="UTF-8"?> +<feed xmlns:georss="http://www.georss.org/georss" xmlns:inspire_dls="http://inspire.ec.europa.eu/schemas/inspire_dls/1.0" xmlns:lang="ger" xmlns="http://www.w3.org/2005/Atom"> + <title>ATOMFeed SH Feldblockfinder Schleswig-Holstein (Downloaddienst)</title> + <subtitle>Downloaddienst zur Bereitstellung der Feldblöcke, Landschaftselemente, GLÖZ 2- und Nitratkulisse in Schleswig-Holstein.</subtitle> + <link href="http://sh-mis.schleswig-holstein.de/soapServices/CSWStartup?service=CSW&version=2.0.2&request=GetRecordById&outputschema=http://www.isotc211.org/2005/gmd&elementsetname=full&ID=cc68aa82-d71b-42bb-b5ce-7b850486a842" rel="describedby" type="application/xml"/> + <link href="https://service.gdi-sh.de/SH_OpenGBD/feeds/Atom_SH_Feldblockfinder_OpenGBD/Atom_SH_Feldblockfinder_OpenGBD.xml" rel="self" type="application/atom+xml" hreflang="ger" title="Selbstreferenz"/> + <link href="https://service.gdi-sh.de/SH_OpenGBD/feeds/Atom_SH_Feldblockfinder_OpenGBD/Atom_SH_Feldblockfinder_OpenGBD_OSD.xml" rel="search" type="application/opensearchdescription+xml" title="Open Search Beschreibung ATOMFeed SH Feldblockfinder Schleswig-Holstein (Downloaddienst)"/> + <id>https://service.gdi-sh.de/SH_OpenGBD/feeds/Atom_SH_Feldblockfinder_OpenGBD/Atom_SH_Feldblockfinder_OpenGBD.xml</id> + <updated>2023-08-22T12:00:00+01:00</updated> + <author> + <name>Landesamt für Vermessung und Geoinformation Schleswig-Holstein (Servicestelle DANord)</name> + <email>DANord@LVermGeo.landsh.de</email> + </author> + <entry> + <title>Feldblöcke SH 2024</title> + <inspire_dls:spatial_dataset_identifier_code>21f67269-780f-4f3c-8f66-03dde27acfe7</inspire_dls:spatial_dataset_identifier_code> + <inspire_dls:spatial_dataset_identifier_namespace>http://registry.gdi.de.org/id/de.sh/</inspire_dls:spatial_dataset_identifier_namespace> + <link rel="describedby" href="http://sh-mis.schleswig-holstein.de/soapServices/CSWStartup?service=CSW&version=2.0.2&request=GetRecordById&outputschema=http://www.isotc211.org/2005/gmd&elementsetname=full&ID=21f67269-780f-4f3c-8f66-03dde27acfe7" type="application/xml"/> + <link rel="alternate" href="https://service.gdi-sh.de/SH_OpenGBD/feeds/Atom_SH_Feldblockfinder_OpenGBD/Atom_SH_Feldblockfinder_OpenGBD_21f67269-780f-4f3c-8f66-03dde27acfe7.xml" type="application/atom+xml" hreflang="ger" title="Feldblöcke SH 2024"/> + <id>https://service.gdi-sh.de/SH_OpenGBD/feeds/Atom_SH_Feldblockfinder_OpenGBD/Atom_SH_Feldblockfinder_OpenGBD_21f67269-780f-4f3c-8f66-03dde27acfe7.xml</id> + <updated>2023-08-22T12:00:00+01:00</updated> + <summary>Feldblöcke in Schleswig-Holstein für den Stichtag 01.01.2024.</summary> + <georss:polygon>53.35 7.86 55.05 7.86 55.05 11.35 53.35 11.35 53.35 7.86</georss:polygon> + <category term="http://www.opengis.net/def/crs/EPSG/0/4258" label="ETRS89"/> + </entry> + <entry> + <title>Landschaftselemente SH 2024</title> + <inspire_dls:spatial_dataset_identifier_code>c4ddc5b7-036c-4670-b5ed-445d1aa20a2a</inspire_dls:spatial_dataset_identifier_code> + <inspire_dls:spatial_dataset_identifier_namespace>http://registry.gdi.de.org/id/de.sh/</inspire_dls:spatial_dataset_identifier_namespace> + <link rel="describedby" href="http://sh-mis.schleswig-holstein.de/soapServices/CSWStartup?service=CSW&version=2.0.2&request=GetRecordById&outputschema=http://www.isotc211.org/2005/gmd&elementsetname=full&ID=c4ddc5b7-036c-4670-b5ed-445d1aa20a2a" type="application/xml"/> + <link rel="alternate" href="https://service.gdi-sh.de/SH_OpenGBD/feeds/Atom_SH_Feldblockfinder_OpenGBD/Atom_SH_Feldblockfinder_OpenGBD_c4ddc5b7-036c-4670-b5ed-445d1aa20a2a.xml" type="application/atom+xml" hreflang="ger" title="Landschaftselemente SH 2024"/> + <id>https://service.gdi-sh.de/SH_OpenGBD/feeds/Atom_SH_Feldblockfinder_OpenGBD/Atom_SH_Feldblockfinder_OpenGBD_c4ddc5b7-036c-4670-b5ed-445d1aa20a2a.xml</id> + <updated>2023-08-22T12:00:00+01:00</updated> + <summary>Landschaftselemente in Schleswig Holstein zum Stichtag 01.01.2024.</summary> + <georss:polygon>53.35 7.86 55.05 7.86 55.05 11.35 53.35 11.35 53.35 7.86</georss:polygon> + <category term="http://www.opengis.net/def/crs/EPSG/0/4258" label="ETRS89"/> + </entry> + <entry> + <title>Feuchtgebiete und Moore ab 2 ha (GLÖZ 2) SH</title> + <inspire_dls:spatial_dataset_identifier_code>176bcfa1-7af2-4bd5-b4e3-2a6212b8fcd2</inspire_dls:spatial_dataset_identifier_code> + <inspire_dls:spatial_dataset_identifier_namespace>http://registry.gdi.de.org/id/de.sh/</inspire_dls:spatial_dataset_identifier_namespace> + <link rel="describedby" href="http://sh-mis.schleswig-holstein.de/soapServices/CSWStartup?service=CSW&version=2.0.2&request=GetRecordById&outputschema=http://www.isotc211.org/2005/gmd&elementsetname=full&ID=176bcfa1-7af2-4bd5-b4e3-2a6212b8fcd2" type="application/xml"/> + <link rel="alternate" href="https://service.gdi-sh.de/SH_OpenGBD/feeds/Atom_SH_Feldblockfinder_OpenGBD/Atom_SH_Feldblockfinder_OpenGBD_176bcfa1-7af2-4bd5-b4e3-2a6212b8fcd2.xml" type="application/atom+xml" hreflang="ger" title="Feuchtgebiete und Moore ab 2 ha (GLÖZ 2) SH"/> + <id>https://service.gdi-sh.de/SH_OpenGBD/feeds/Atom_SH_Feldblockfinder_OpenGBD/Atom_SH_Feldblockfinder_OpenGBD_176bcfa1-7af2-4bd5-b4e3-2a6212b8fcd2.xml</id> + <updated>2023-08-18T12:00:00+01:00</updated> + <summary>Feuchtgebiete und Moore ab 2 ha (GLÖZ 2) in Schleswig-Holstein.</summary> + <georss:polygon>11.35 7.86 55.05 7.86 55.05 53.35 11.35 53.35 11.35 7.86</georss:polygon> + <category term="http://www.opengis.net/def/crs/EPSG/0/4258" label="ETRS89"/> + </entry> + <entry> + <title>Nitrat-belastete Gebiete nach LDüV in SH</title> + <inspire_dls:spatial_dataset_identifier_code>694bd2d9-3bea-4818-961c-25024acc0588</inspire_dls:spatial_dataset_identifier_code> + <inspire_dls:spatial_dataset_identifier_namespace>http://registry.gdi.de.org/id/de.sh/</inspire_dls:spatial_dataset_identifier_namespace> + <link rel="describedby" href="http://sh-mis.schleswig-holstein.de/soapServices/CSWStartup?service=CSW&version=2.0.2&request=GetRecordById&outputschema=http://www.isotc211.org/2005/gmd&elementsetname=full&ID=694bd2d9-3bea-4818-961c-25024acc0588" type="application/xml"/> + <link rel="alternate" href="https://service.gdi-sh.de/SH_OpenGBD/feeds/Atom_SH_Feldblockfinder_OpenGBD/Atom_SH_Feldblockfinder_OpenGBD_694bd2d9-3bea-4818-961c-25024acc0588.xml" type="application/atom+xml" hreflang="ger" title="Nitrat-belastete Gebiete nach LDüV in SH"/> + <id>https://service.gdi-sh.de/SH_OpenGBD/feeds/Atom_SH_Feldblockfinder_OpenGBD/Atom_SH_Feldblockfinder_OpenGBD_694bd2d9-3bea-4818-961c-25024acc0588.xml</id> + <updated>2023-08-18T12:00:00+01:00</updated> + <summary>Nitrat-belastete Gebiete für das Jahr 2022 nach LDüV in SH.</summary> + <georss:polygon>11.35 7.86 55.05 7.86 55.05 53.35 11.35 53.35 11.35 7.86</georss:polygon> + <category term="http://www.opengis.net/def/crs/EPSG/0/4258" label="ETRS89"/> + </entry> +</feed> diff --git a/tests/test_atom_format.py b/tests/test_atom_format.py new file mode 100644 index 0000000000000000000000000000000000000000..902b3339471f32dfc620d15a6402b903704b6517 --- /dev/null +++ b/tests/test_atom_format.py @@ -0,0 +1,27 @@ +import unittest +from formats.atom_format import is_valid + + +class TestAtomFormat(unittest.TestCase): + def test_is_valid__valid(self): + resource = {} + with open("tests/data/Atom_SH_Feldblockfinder_OpenGBD.xml", "r") as file: + self.assertTrue(is_valid(resource, file)) + self.assertIsNone(resource.get("error")) + + def test_is_valid__other_xml(self): + resource = {} + with open("tests/data/correct.xml", "r") as file: + self.assertFalse(is_valid(resource, file)) + self.assertIsNotNone(resource.get("error")) + + def test_is_valid__invalid_xml(self): + resource = {} + with open("tests/data/incorrect.xml", "r") as file: + self.assertFalse(is_valid(resource, file)) + self.assertIsNotNone(resource.get("error")) + + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_docx_format.py b/tests/test_docx_format.py new file mode 100644 index 0000000000000000000000000000000000000000..647b3693b2c616cc6afeb9331812d70b5e492e28 --- /dev/null +++ b/tests/test_docx_format.py @@ -0,0 +1,32 @@ +import unittest +from formats.docx_format import is_valid + + +class TestDocxFormat(unittest.TestCase): + def test_is_valid__valid(self): + resource = {} + with open("tests/data/valid.docx", "r") as file: + self.assertTrue(is_valid(resource, file)) + self.assertIsNone(resource.get("error")) + + def test_is_valid__invalid_no_zip(self): + resource = {} + with open("tests/data/correct.json", "r") as file: + self.assertFalse(is_valid(resource, file)) + self.assertIsNotNone(resource.get("error")) + + def test_is_valid__invalid_no_excel(self): + resource = {} + with open("tests/data/valid.xlsx", "r") as file: + self.assertFalse(is_valid(resource, file)) + self.assertIsNotNone(resource.get("error")) + + def test_is_valid__invalid_zip(self): + resource = {} + with open("tests/data/valid.odt", "r") as file: + self.assertFalse(is_valid(resource, file)) + self.assertIsNotNone(resource.get("error")) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_odt_format.py b/tests/test_odt_format.py new file mode 100644 index 0000000000000000000000000000000000000000..8276fe7ffc3490115616eb413c3b40fc3ab9f917 --- /dev/null +++ b/tests/test_odt_format.py @@ -0,0 +1,36 @@ +import unittest +from formats.odt_format import is_valid + + +class TestOdsFormat(unittest.TestCase): + def test_is_valid__valid(self): + resource = {} + with open("tests/data/valid.odt", "r") as file: + self.assertTrue(is_valid(resource, file)) + self.assertIsNone(resource.get("error")) + + def test_is_valid__invalid_no_zip(self): + resource = {} + with open("tests/data/correct.json", "r") as file: + self.assertFalse(is_valid(resource, file)) + self.assertIsNotNone(resource.get("error")) + + def test_is_valid__invalid_no_odt(self): + resource = {} + with open("tests/data/valid.ods", "r") as file: + self.assertFalse(is_valid(resource, file)) + self.assertIsNotNone(resource.get("error")) + self.assertEqual( + "Incorrect MIME type: application/vnd.oasis.opendocument.spreadsheet", + resource["error"], + ) + + def test_is_valid__invalid_zip(self): + resource = {} + with open("tests/data/valid.docx", "r") as file: + self.assertFalse(is_valid(resource, file)) + self.assertIsNotNone(resource.get("error")) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_xlsx_format.py b/tests/test_xlsx_format.py new file mode 100644 index 0000000000000000000000000000000000000000..3de99678c67ccfacc82559384de1792684fefae2 --- /dev/null +++ b/tests/test_xlsx_format.py @@ -0,0 +1,32 @@ +import unittest +from formats.xlsx_format import is_valid + + +class TestXlsxFormat(unittest.TestCase): + def test_is_valid__valid(self): + resource = {} + with open("tests/data/valid.xlsx", "r") as file: + self.assertTrue(is_valid(resource, file)) + self.assertIsNone(resource.get("error")) + + def test_is_valid__invalid_no_zip(self): + resource = {} + with open("tests/data/correct.json", "r") as file: + self.assertFalse(is_valid(resource, file)) + self.assertIsNotNone(resource.get("error")) + + def test_is_valid__invalid_no_excel(self): + resource = {} + with open("tests/data/valid.docx", "r") as file: + self.assertFalse(is_valid(resource, file)) + self.assertIsNotNone(resource.get("error")) + + def test_is_valid__invalid_zip(self): + resource = {} + with open("tests/data/valid.ods", "r") as file: + self.assertFalse(is_valid(resource, file)) + self.assertIsNotNone(resource.get("error")) + + +if __name__ == "__main__": + unittest.main()