Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision

Target

Select target project
  • opendata/dcat-catalog-check
1 result
Select Git revision
Show changes
Showing
with 508 additions and 30 deletions
File added
File added
File added
import unittest
import importlib
import pkgutil
import tempfile
class TestAllFormats(unittest.TestCase):
def test_load_all_modules(self):
"""Make sure that every format module has been loaded at least once.
Otherwise, the code coverage will not know about the file."""
package = importlib.import_module("formats")
modules = [module.name for module in pkgutil.iter_modules(package.__path__)]
for module in modules:
format_check_module = importlib.import_module("formats." + module)
with tempfile.NamedTemporaryFile(delete=True) as temp_file:
resource = {}
resource["url"] = "https://test.invalid/data"
try:
format_check_module.is_valid(resource, temp_file)
except Exception as e:
print(f"Module for format {module} failed.")
raise (e)
if __name__ == "__main__":
unittest.main()
import unittest
from formats.atom_format import is_valid
class TestAtomFormat(unittest.TestCase):
def test_is_valid__valid(self):
resource = {}
with open("tests/data/Atom_SH_Feldblockfinder_OpenGBD.xml", "r") as file:
self.assertTrue(is_valid(resource, file))
self.assertIsNone(resource.get("error"))
def test_is_valid__other_xml(self):
resource = {}
with open("tests/data/correct.xml", "r") as file:
self.assertFalse(is_valid(resource, file))
self.assertIsNotNone(resource.get("error"))
def test_is_valid__invalid_xml(self):
resource = {}
with open("tests/data/incorrect.xml", "r") as file:
self.assertFalse(is_valid(resource, file))
self.assertIsNotNone(resource.get("error"))
if __name__ == "__main__":
unittest.main()
......@@ -7,14 +7,19 @@ from dcat_catalog_check import (
)
from rdflib import Graph
from rdflib.namespace import RDF, DCAT
from http.server import SimpleHTTPRequestHandler, HTTPServer
import threading
import time
import io
import requests
import sys
class TestDcatCatalogCheck(unittest.TestCase):
def setUp(self):
self.dcc = DcatCatalogCheck(
"http://localhost:8000/", "my_api_key")
self.dcc = DcatCatalogCheck("http://test.invalid:8000/", "my_api_key")
# Mock the logger to capture log messages
self.logger_patch = patch.object(self.dcc, 'logger', MagicMock())
self.logger_patch = patch.object(self.dcc, "logger", MagicMock())
self.mock_logger = self.logger_patch.start()
def tearDown(self):
......@@ -30,13 +35,10 @@ class TestDcatCatalogCheck(unittest.TestCase):
"XML": ["application/xml"],
}
self.assertTrue(self.dcc.is_mime_type_compatible(
"JSON", "application/json"))
self.assertFalse(self.dcc.is_mime_type_compatible(
"JSON", "application/xml"))
self.assertTrue(self.dcc.is_mime_type_compatible("JSON", "application/json"))
self.assertFalse(self.dcc.is_mime_type_compatible("JSON", "application/xml"))
self.assertFalse(
self.dcc.is_mime_type_compatible(
"UnknownFormat", "application/json")
self.dcc.is_mime_type_compatible("UnknownFormat", "application/json")
)
def test_read_allowed_file_formats(self):
......@@ -48,8 +50,7 @@ class TestDcatCatalogCheck(unittest.TestCase):
):
formats = self.dcc.read_allowed_file_formats()
self.assertEqual(
formats, {"JSON": ["application/json"],
"XML": ["application/xml"]}
formats, {"JSON": ["application/json"], "XML": ["application/xml"]}
)
def test_load_uri_replacements(self):
......@@ -59,10 +60,8 @@ class TestDcatCatalogCheck(unittest.TestCase):
read_data='[{"regex": "old", "replaced_by": "new"}]'
),
):
replacements = self.dcc.load_uri_replacements()
self.assertEqual(
replacements, [{"regex": "old", "replaced_by": "new"}])
self.assertEqual(replacements, [{"regex": "old", "replaced_by": "new"}])
# Simulate that the file does not exist
......@@ -111,7 +110,7 @@ class TestDcatCatalogCheck(unittest.TestCase):
self.dcc.load_http_complete = MagicMock(return_value=mock_response)
resource = {}
resource["url"] = "http://localhost/data"
resource["url"] = "http://test.invalid/data"
resource["format"] = "JSON"
self.dcc.check_resource(resource)
self.assertEqual(resource["accessible"], True)
......@@ -128,7 +127,7 @@ class TestDcatCatalogCheck(unittest.TestCase):
self.dcc.load_http_complete = MagicMock(return_value=mock_response)
resource = {}
resource["url"] = "http://localhost/data"
resource["url"] = "http://test.invalid/data"
resource["format"] = "JSON"
self.dcc.check_resource(resource)
self.assertEqual(resource["accessible"], True)
......@@ -146,7 +145,7 @@ class TestDcatCatalogCheck(unittest.TestCase):
self.dcc.load_http_complete = MagicMock(return_value=mock_response)
resource = {}
resource["url"] = "http://localhost/data"
resource["url"] = "http://test.invalid/data"
resource["format"] = "JSON"
self.dcc.check_resource(resource)
self.assertEqual(resource["accessible"], True)
......@@ -164,7 +163,7 @@ class TestDcatCatalogCheck(unittest.TestCase):
self.dcc.load_http_complete = MagicMock(return_value=mock_response)
resource = {}
resource["url"] = "http://localhost/data"
resource["url"] = "http://test.invalid/data"
resource["format"] = "JSON"
self.dcc.check_resource(resource)
self.assertEqual(resource["accessible"], True)
......@@ -182,7 +181,7 @@ class TestDcatCatalogCheck(unittest.TestCase):
self.dcc.load_http_complete = MagicMock(return_value=mock_response)
resource = {}
resource["url"] = "http://localhost/data"
resource["url"] = "http://test.invalid/data"
resource["format"] = "JSON"
self.dcc.check_resource(resource)
self.assertEqual(resource["accessible"], True)
......@@ -198,7 +197,7 @@ class TestDcatCatalogCheck(unittest.TestCase):
self.dcc.load_http_complete = MagicMock(return_value=mock_response)
resource = {}
resource["url"] = "http://localhost/data"
resource["url"] = "http://test.invalid/data"
resource["format"] = "XML"
self.dcc.check_resource(resource)
self.assertEqual(resource["accessible"], True)
......@@ -214,7 +213,7 @@ class TestDcatCatalogCheck(unittest.TestCase):
self.dcc.load_http_complete = MagicMock(return_value=mock_response)
resource = {}
resource["url"] = "http://localhost/data"
resource["url"] = "http://test.invalid/data"
resource["format"] = "PNG"
resource["checksum_algorithm"] = (
"http://spdx.org/rdf/terms#checksumAlgorithm_sha1"
......@@ -247,7 +246,7 @@ class TestDcatCatalogCheck(unittest.TestCase):
self.dcc.load_http_complete = MagicMock(return_value=mock_response)
resource = {}
resource["url"] = "http://localhost/data"
resource["url"] = "http://test.invalid/data"
resource["format"] = "JSON"
self.dcc.check_resource(resource)
self.assertEqual(resource.get("accessible"), True)
......@@ -266,7 +265,7 @@ class TestDcatCatalogCheck(unittest.TestCase):
self.dcc.load_http_complete = MagicMock(return_value=mock_response)
resource = {}
resource["url"] = "http://localhost/data"
resource["url"] = "http://test.invalid/data"
resource["format"] = "JSON"
self.dcc.check_resource(resource)
self.assertEqual(resource.get("accessible"), True)
......@@ -285,7 +284,7 @@ class TestDcatCatalogCheck(unittest.TestCase):
self.dcc.load_http_complete = MagicMock(return_value=mock_response)
resource = {}
resource["url"] = "http://localhost/data"
resource["url"] = "http://test.invalid/data"
resource["format"] = "JSON"
self.dcc.check_resource(resource)
self.assertEqual(resource.get("accessible"), True)
......@@ -312,7 +311,7 @@ class TestDcatCatalogCheck(unittest.TestCase):
self.dcc.load_http_complete = MagicMock(return_value=mock_response)
resource = {}
resource["url"] = "http://localhost/zos116.zip"
resource["url"] = "http://test.invalid/zos116.zip"
resource["format"] = "SHP"
self.dcc.check_resource(resource)
......@@ -326,7 +325,7 @@ class TestDcatCatalogCheck(unittest.TestCase):
# Test data to simulate the contents of previous_results.json
test_data = [
{"url": "http://example.com", "status": "valid", "format": "JSON"},
{"url": "http://example.org", "status": "invalid", "format": "XML"}
{"url": "http://example.org", "status": "invalid", "format": "XML"},
]
# Write test data to a file 'previous_results.json'
......@@ -342,9 +341,11 @@ class TestDcatCatalogCheck(unittest.TestCase):
self.assertIn("http://example.com", self.dcc.previous_results)
self.assertIn("http://example.org", self.dcc.previous_results)
self.assertEqual(
self.dcc.previous_results["http://example.com"]["status"], "valid")
self.dcc.previous_results["http://example.com"]["status"], "valid"
)
self.assertEqual(
self.dcc.previous_results["http://example.org"]["status"], "invalid")
self.dcc.previous_results["http://example.org"]["status"], "invalid"
)
@patch("os.path.exists", return_value=False)
def test_read_previous_results_file_not_exist(self, mock_exists):
......@@ -365,7 +366,12 @@ class TestDcatCatalogCheck(unittest.TestCase):
"Invalid JSON at line 1: Expecting value: line 1 column 1 (char 0)"
)
@patch("builtins.open", mock_open(read_data='{"status": "valid", "format": "JSON"}\n{"url": "http://example.com", "status": "valid", "format": "JSON"}'))
@patch(
"builtins.open",
mock_open(
read_data='{"status": "valid", "format": "JSON"}\n{"url": "http://example.com", "status": "valid", "format": "JSON"}'
),
)
@patch("os.path.exists", return_value=True)
def test_read_previous_results_missing_url(self, mock_exists):
"""Test when the file has a line with missing 'url'."""
......@@ -375,6 +381,117 @@ class TestDcatCatalogCheck(unittest.TestCase):
'Line 1 is missing \'url\': {"status": "valid", "format": "JSON"}'
)
def test_apply_uri_replacements(self):
"""Test the apply_uri_replacements method."""
# Setup URI replacements
self.dcc.uri_replacements = [
{"regex": r"example\.com", "replaced_by": "test.com"},
{"regex": r"http://", "replaced_by": "https://"},
]
# URL matching both replacements
url = "http://example.com/path"
result = self.dcc.apply_uri_replacements(url)
self.assertEqual(result, "https://test.com/path")
# URL matching only one replacement
url = "http://other.com/path"
result = self.dcc.apply_uri_replacements(url)
self.assertEqual(result, "https://other.com/path")
# URL with no matches
url = "https://unchanged.com/path"
result = self.dcc.apply_uri_replacements(url)
self.assertEqual(result, "https://unchanged.com/path")
# Empty URL
url = ""
result = self.dcc.apply_uri_replacements(url)
self.assertEqual(result, "")
# No URI replacements defined
self.dcc.uri_replacements = []
url = "http://example.com/path"
result = self.dcc.apply_uri_replacements(url)
self.assertEqual(result, "http://example.com/path")
def test_clear_result(self):
"""Test the _clear_result method."""
# Define a resource dictionary with keys to clear and some additional keys
resource = {
"accessible": True,
"checksum_ok": True,
"duration": 1.23,
"error": "Some error",
"etag": "some-etag",
"http_status": 200,
"last_check": "2024-12-27T12:34:56Z",
"mimetype": "application/json",
"mimetype_mismatch": False,
"valid": True,
"url": "http://example.com/data", # This key should remain untouched
"format": "JSON", # This key should remain untouched
}
# Call the _clear_result method
self.dcc._clear_result(resource)
# Check that all keys to clear have been removed
for key in [
"accessible",
"checksum_ok",
"duration",
"error",
"etag",
"http_status",
"last_check",
"mimetype",
"mimetype_mismatch",
"valid",
]:
self.assertNotIn(key, resource)
# Check that unrelated keys remain
self.assertIn("url", resource)
self.assertIn("format", resource)
self.assertEqual(resource["url"], "http://example.com/data")
self.assertEqual(resource["format"], "JSON")
def _wait_for_server(self, url, timeout=10, interval=0.2):
"""Wait until the server can be reached at the specified URL."""
start_time = time.time()
while time.time() - start_time < timeout:
try:
response = requests.get(url)
if response.status_code == 200:
return True
except requests.exceptions.RequestException:
pass
time.sleep(interval)
print(f"Timeout reached: Server at {url} not reachable.")
return False
def _run_server(self):
server_address = ("", 8000)
httpd = HTTPServer(server_address, SimpleHTTPRequestHandler)
httpd.serve_forever()
def test_read_dcat_catalog(self):
server_thread = threading.Thread(target=self._run_server)
server_thread.daemon = True
server_thread.start()
self._wait_for_server("http://localhost:8000")
mock_stdout = io.StringIO()
sys.stdout = mock_stdout
self.dcc.read_dcat_catalog("http://localhost:8000/tests/data/all-tests.ttl")
sys.stdout = sys.__stdout__
output = mock_stdout.getvalue()
json_objects = [json.loads(line) for line in output.splitlines()]
self.assertEqual(len(json_objects), 31)
if __name__ == "__main__":
unittest.main()
import unittest
from formats.docx_format import is_valid
class TestDocxFormat(unittest.TestCase):
def test_is_valid__valid(self):
resource = {}
with open("tests/data/valid.docx", "r") as file:
self.assertTrue(is_valid(resource, file))
self.assertIsNone(resource.get("error"))
def test_is_valid__invalid_no_zip(self):
resource = {}
with open("tests/data/correct.json", "r") as file:
self.assertFalse(is_valid(resource, file))
self.assertIsNotNone(resource.get("error"))
def test_is_valid__invalid_no_excel(self):
resource = {}
with open("tests/data/valid.xlsx", "r") as file:
self.assertFalse(is_valid(resource, file))
self.assertIsNotNone(resource.get("error"))
def test_is_valid__invalid_zip(self):
resource = {}
with open("tests/data/valid.odt", "r") as file:
self.assertFalse(is_valid(resource, file))
self.assertIsNotNone(resource.get("error"))
if __name__ == "__main__":
unittest.main()
......@@ -2,16 +2,18 @@ import unittest
from formats.geojson_format import is_valid
class TestShpFormat(unittest.TestCase):
class TestGeoJsonFormat(unittest.TestCase):
def test_is_valid__valid(self):
resource = {}
with open("tests/data/bermuda.geojson", "r") as file:
self.assertTrue(is_valid(resource, file))
self.assertIsNone(resource.get("error"))
def test_is_valid__invalid(self):
resource = {}
with open("tests/data/correct.json", "r") as file:
self.assertFalse(is_valid(resource, file))
self.assertIsNotNone(resource.get("error"))
if __name__ == "__main__":
......
import unittest
from formats.geotiff_format import is_valid
class TestGeotiffFormat(unittest.TestCase):
def test_is_valid__valid(self):
resource = {}
with open("tests/data/valid_geotiff.tif", "r") as file:
self.assertTrue(is_valid(resource, file))
def test_is_valid__zip(self):
"""The ZIP file contains the TIFF image and a TFW world file."""
resource = {}
with open("tests/data/geotiff.zip", "r") as file:
self.assertTrue(is_valid(resource, file))
def test_is_valid__invalid(self):
resource = {}
with open("tests/data/valid.tif", "r") as file:
self.assertFalse(is_valid(resource, file))
self.assertIsNotNone(resource.get("error"))
self.assertEqual("missing transformation", resource["error"])
if __name__ == "__main__":
unittest.main()
......@@ -7,11 +7,13 @@ class TestGmlFormat(unittest.TestCase):
resource = {}
with open("tests/data/bermuda.gml", "r") as file:
self.assertTrue(is_valid(resource, file))
self.assertIsNone(resource.get("error"))
def test_is_valid__invalid(self):
resource = {}
with open("tests/data/correct.xml", "r") as file:
self.assertFalse(is_valid(resource, file))
self.assertIsNotNone(resource.get("error"))
if __name__ == "__main__":
......
import unittest
from formats.jpeg_format import is_valid
class TestJpegFormat(unittest.TestCase):
def test_is_valid__valid(self):
resource = {}
with open("tests/data/image.jpeg", "r") as file:
self.assertTrue(is_valid(resource, file))
def test_is_valid__invalid(self):
resource = {}
with open("tests/data/image.png", "r") as file:
self.assertFalse(is_valid(resource, file))
if __name__ == "__main__":
unittest.main()
......@@ -15,6 +15,13 @@ class TestJsonFormat(unittest.TestCase):
self.assertFalse(is_valid(resource, file))
self.assertIsNotNone(resource.get("error"))
def test_is_valid__frictionless_valid(self):
resource = {}
with open("tests/data/ufo-resource.json", "r") as file:
self.assertTrue(is_valid(resource, file))
self.assertIsNone(resource.get("error"))
self.assertTrue(resource.get("schema_valid"))
if __name__ == "__main__":
unittest.main()
import unittest
from formats.ods_format import is_valid
class TestOdsFormat(unittest.TestCase):
def test_is_valid__valid(self):
resource = {}
with open("tests/data/valid.ods", "r") as file:
self.assertTrue(is_valid(resource, file))
self.assertIsNone(resource.get("error"))
def test_is_valid__invalid_no_zip(self):
resource = {}
with open("tests/data/correct.json", "r") as file:
self.assertFalse(is_valid(resource, file))
self.assertIsNotNone(resource.get("error"))
def test_is_valid__invalid_no_odt(self):
resource = {}
with open("tests/data/valid.odt", "r") as file:
self.assertFalse(is_valid(resource, file))
self.assertIsNotNone(resource.get("error"))
self.assertEqual(
"Incorrect MIME type: application/vnd.oasis.opendocument.text",
resource["error"],
)
def test_is_valid__invalid_zip(self):
resource = {}
with open("tests/data/valid.xlsx", "r") as file:
self.assertFalse(is_valid(resource, file))
self.assertIsNotNone(resource.get("error"))
if __name__ == "__main__":
unittest.main()
import unittest
from formats.odt_format import is_valid
class TestOdsFormat(unittest.TestCase):
def test_is_valid__valid(self):
resource = {}
with open("tests/data/valid.odt", "r") as file:
self.assertTrue(is_valid(resource, file))
self.assertIsNone(resource.get("error"))
def test_is_valid__invalid_no_zip(self):
resource = {}
with open("tests/data/correct.json", "r") as file:
self.assertFalse(is_valid(resource, file))
self.assertIsNotNone(resource.get("error"))
def test_is_valid__invalid_no_odt(self):
resource = {}
with open("tests/data/valid.ods", "r") as file:
self.assertFalse(is_valid(resource, file))
self.assertIsNotNone(resource.get("error"))
self.assertEqual(
"Incorrect MIME type: application/vnd.oasis.opendocument.spreadsheet",
resource["error"],
)
def test_is_valid__invalid_zip(self):
resource = {}
with open("tests/data/valid.docx", "r") as file:
self.assertFalse(is_valid(resource, file))
self.assertIsNotNone(resource.get("error"))
if __name__ == "__main__":
unittest.main()
import unittest
from formats.pdf_format import is_valid
class TestPdfFormat(unittest.TestCase):
def test_is_valid__valid(self):
resource = {}
with open("tests/data/valid.pdf", "r") as file:
self.assertTrue(is_valid(resource, file))
def test_is_valid__invalid(self):
resource = {}
with open("tests/data/image.png", "r") as file:
self.assertFalse(is_valid(resource, file))
if __name__ == "__main__":
unittest.main()
import unittest
from formats.rdf_format import is_valid
class TestRdfFormat(unittest.TestCase):
def test_is_valid__valid_turtle(self):
resource = {}
with open("tests/data/ufo.ttl", "r") as file:
self.assertTrue(is_valid(resource, file))
self.assertIsNone(resource.get("error"))
def test_is_valid__valid_xml(self):
resource = {}
with open("tests/data/rdf.xml", "r") as file:
self.assertTrue(is_valid(resource, file))
self.assertIsNone(resource.get("error"))
def test_is_valid__valid_jsonld(self):
resource = {}
with open("tests/data/rdf.json", "r") as file:
self.assertTrue(is_valid(resource, file))
self.assertIsNone(resource.get("error"))
def test_is_valid__invalid(self):
resource = {}
with open("tests/data/correct.json", "r") as file:
self.assertFalse(is_valid(resource, file))
self.assertIsNotNone(resource.get("error"))
if __name__ == "__main__":
unittest.main()
import unittest
from formats.wmts_srvc_format import is_valid
class TestWmtsSrvcFormat(unittest.TestCase):
def test_is_valid__valid(self):
resource = {}
resource["url"] = (
"https://dienste.gdi-sh.invalid/WMTS_SH_ALKIS_OpenGBD/wmts/1.0.0/WMTSCapabilities.xml"
)
with open("tests/data/WMTSCapabilities.xml", "r") as file:
self.assertTrue(is_valid(resource, file))
self.assertIsNone(resource.get("error"))
def test_is_valid__invalid(self):
resource = {}
resource["url"] = (
"https://dienste.gdi-sh.invalid/WMTS_SH_ALKIS_OpenGBD/wmts/1.0.0/WMTSCapabilities.xml"
)
with open("tests/data/correct.xml", "r") as file:
self.assertFalse(is_valid(resource, file))
self.assertIsNotNone(resource.get("error"))
if __name__ == "__main__":
unittest.main()
import unittest
from formats.xlsx_format import is_valid
class TestXlsxFormat(unittest.TestCase):
def test_is_valid__valid(self):
resource = {}
with open("tests/data/valid.xlsx", "r") as file:
self.assertTrue(is_valid(resource, file))
self.assertIsNone(resource.get("error"))
def test_is_valid__invalid_no_zip(self):
resource = {}
with open("tests/data/correct.json", "r") as file:
self.assertFalse(is_valid(resource, file))
self.assertIsNotNone(resource.get("error"))
def test_is_valid__invalid_no_excel(self):
resource = {}
with open("tests/data/valid.docx", "r") as file:
self.assertFalse(is_valid(resource, file))
self.assertIsNotNone(resource.get("error"))
def test_is_valid__invalid_zip(self):
resource = {}
with open("tests/data/valid.ods", "r") as file:
self.assertFalse(is_valid(resource, file))
self.assertIsNotNone(resource.get("error"))
if __name__ == "__main__":
unittest.main()
import unittest
from formats.xml_format import is_valid
class TestXmlFormat(unittest.TestCase):
def test_is_valid__valid(self):
resource = {}
with open("tests/data/correct.xml", "r") as file:
self.assertTrue(is_valid(resource, file))
self.assertIsNone(resource.get("error"))
def test_is_valid__invalid(self):
resource = {}
with open("tests/data/incorrect.xml", "r") as file:
self.assertFalse(is_valid(resource, file))
self.assertIsNotNone(resource.get("error"))
if __name__ == "__main__":
unittest.main()
import unittest
from formats.zip_format import is_valid
class TestZipFormat(unittest.TestCase):
def test_is_valid__valid(self):
resource = {}
with open("tests/data/bermuda.zip", "r") as file:
self.assertTrue(is_valid(resource, file))
self.assertIsNone(resource.get("error"))
def test_is_valid__invalid(self):
resource = {}
with open("tests/data/correct.xml", "r") as file:
self.assertFalse(is_valid(resource, file))
self.assertIsNotNone(resource.get("error"))
self.assertEqual("Not a ZIP file.", resource["error"])
if __name__ == "__main__":
unittest.main()