Newer
Older
import os
import unittest
import json
from unittest.mock import patch, mock_open, MagicMock
from dcat_catalog_check import (
DcatCatalogCheck,
)
from rdflib import Graph
from rdflib.namespace import RDF, DCAT
class TestDcatCatalogCheck(unittest.TestCase):
def setUp(self):
self.dcc = DcatCatalogCheck("http://test.invalid:8000/", "my_api_key")
self.logger_patch = patch.object(self.dcc, "logger", MagicMock())
self.mock_logger = self.logger_patch.start()
def tearDown(self):
if os.path.exists("my_api_key"):
os.remove("my_api_key")
if os.path.exists("previous_results.json"):
os.remove("previous_results.json")
self.logger_patch.stop()
def test_is_mime_type_compatible(self):
self.dcc.allowed_file_formats = {
"JSON": ["application/json"],
"XML": ["application/xml"],
}
self.assertTrue(self.dcc.is_mime_type_compatible(
"JSON", "application/json"))
self.assertFalse(self.dcc.is_mime_type_compatible(
"JSON", "application/xml"))
self.dcc.is_mime_type_compatible(
"UnknownFormat", "application/json")
)
def test_read_allowed_file_formats(self):
with patch(
"builtins.open",
unittest.mock.mock_open(
read_data='{"JSON": ["application/json"], "XML": ["application/xml"]}'
),
):
formats = self.dcc.read_allowed_file_formats()
self.assertEqual(
formats, {"JSON": ["application/json"],
"XML": ["application/xml"]}
)
def test_load_uri_replacements(self):
with patch("os.path.exists", return_value=True), patch(
"builtins.open",
unittest.mock.mock_open(
read_data='[{"regex": "old", "replaced_by": "new"}]'
),
):
replacements = self.dcc.load_uri_replacements()
self.assertEqual(
replacements, [{"regex": "old", "replaced_by": "new"}])
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# Simulate that the file does not exist
@patch("os.path.exists", return_value=False)
def test_load_uri_replacements_file_not_exist(self, mock_exists):
# Call the method to test
replacements = self.dcc.load_uri_replacements()
# Assert that it returns an empty list
self.assertEqual(replacements, [])
@patch("dcat_catalog_check.requests.get")
def test_load_http_complete(self, mock_get):
mock_response = MagicMock()
mock_response.content = b"content"
mock_get.return_value = mock_response
response = self.dcc.load_http_complete("http://example.com")
self.assertEqual(response.content, b"content")
def test_get_publisher(self):
g = Graph()
g.parse(
data='@prefix dcat: <http://www.w3.org/ns/dcat#> .\n@prefix dct: <http://purl.org/dc/terms/> .\n@prefix foaf: <http://xmlns.com/foaf/0.1/> .\n<http://example.org/DS> a dcat:Dataset; dct:publisher [ a foaf:Organization ; foaf:name "The publisher" ] .',
format="ttl",
)
for dataset in g.subjects(predicate=RDF.type, object=DCAT.Dataset):
result = self.dcc._get_publisher(g, dataset)
self.assertEqual("The publisher", result)
def test_get_publisher_url(self):
g = Graph()
g.parse(
data="@prefix dcat: <http://www.w3.org/ns/dcat#> .\n@prefix dct: <http://purl.org/dc/terms/> .\n<http://example.org/DS> a dcat:Dataset; dct:publisher <http://example.org/publisher> .",
format="ttl",
)
for dataset in g.subjects(predicate=RDF.type, object=DCAT.Dataset):
result = self.dcc._get_publisher(g, dataset)
self.assertEqual("http://example.org/publisher", result)
def test_check_resource__json_valid(self):
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.reason = "OK"
with open("tests/data/correct.json", "rb") as file:
mock_response.content = file.read()
self.dcc.load_http_complete = MagicMock(return_value=mock_response)
resource = {}
resource["url"] = "http://test.invalid/data"
resource["format"] = "JSON"
self.dcc.check_resource(resource)
self.assertEqual(resource["accessible"], True)
self.assertEqual(resource["valid"], True)
self.assertEqual(resource["http_status"], 200)
self.assertEqual(resource["mimetype"], "application/json")
def test_check_resource__json_gz_valid(self):
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.reason = "OK"
with open("tests/data/correct.json.gz", "rb") as file:
mock_response.content = file.read()
self.dcc.load_http_complete = MagicMock(return_value=mock_response)
resource = {}
resource["url"] = "http://test.invalid/data"
resource["format"] = "JSON"
self.dcc.check_resource(resource)
self.assertEqual(resource["accessible"], True)
self.assertEqual(resource["valid"], True)
self.assertEqual(resource["http_status"], 200)
self.assertEqual(resource["mimetype"], "application/json")
self.assertEqual(resource["compress_format"], "application/gzip")
def test_check_resource__json_bz2_valid(self):
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.reason = "OK"
with open("tests/data/correct.json.bz2", "rb") as file:
mock_response.content = file.read()
self.dcc.load_http_complete = MagicMock(return_value=mock_response)
resource = {}
resource["url"] = "http://test.invalid/data"
resource["format"] = "JSON"
self.dcc.check_resource(resource)
self.assertEqual(resource["accessible"], True)
self.assertEqual(resource["valid"], True)
self.assertEqual(resource["http_status"], 200)
self.assertEqual(resource["mimetype"], "application/json")
self.assertEqual(resource["compress_format"], "application/x-bzip2")
def test_check_resource__json_xz_valid(self):
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.reason = "OK"
with open("tests/data/correct.json.xz", "rb") as file:
mock_response.content = file.read()
self.dcc.load_http_complete = MagicMock(return_value=mock_response)
resource = {}
resource["url"] = "http://test.invalid/data"
resource["format"] = "JSON"
self.dcc.check_resource(resource)
self.assertEqual(resource["accessible"], True)
self.assertEqual(resource["valid"], True)
self.assertEqual(resource["http_status"], 200)
self.assertEqual(resource["mimetype"], "application/json")
self.assertEqual(resource["compress_format"], "application/x-xz")
def test_check_resource__json_invalid(self):
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.reason = "OK"
with open("tests/data/incorrect.json", "rb") as file:
mock_response.content = file.read()
self.dcc.load_http_complete = MagicMock(return_value=mock_response)
resource = {}
resource["url"] = "http://test.invalid/data"
resource["format"] = "JSON"
self.dcc.check_resource(resource)
self.assertEqual(resource["accessible"], True)
self.assertEqual(resource["valid"], False)
self.assertEqual(resource["http_status"], 200)
def test_check_resource__xml_valid(self):
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.reason = "OK"
with open("tests/data/correct.xml", "rb") as file:
mock_response.content = file.read()
self.dcc.load_http_complete = MagicMock(return_value=mock_response)
resource = {}
resource["url"] = "http://test.invalid/data"
resource["format"] = "XML"
self.dcc.check_resource(resource)
self.assertEqual(resource["accessible"], True)
self.assertEqual(resource["valid"], True)
self.assertEqual(resource["http_status"], 200)
def test_check_resource__png_valid(self):
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.reason = "OK"
with open("tests/data/image.png", "rb") as file:
mock_response.content = file.read()
self.dcc.load_http_complete = MagicMock(return_value=mock_response)
resource = {}
resource["url"] = "http://test.invalid/data"
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
resource["format"] = "PNG"
resource["checksum_algorithm"] = (
"http://spdx.org/rdf/terms#checksumAlgorithm_sha1"
)
resource["checksum_value"] = "a8643241029f9779302874db5c18b0f0bacbdd25"
self.dcc.check_resource(resource)
self.assertEqual(resource["accessible"], True)
self.assertEqual(resource["valid"], True)
self.assertEqual(resource["http_status"], 200)
self.assertEqual(resource["mimetype"], "image/png")
self.assertEqual(resource["checksum_ok"], True)
def test_check_checksum(self):
"""The checksum check also works with the old DCAT-AT.de algorithm specifications"""
resource = {}
resource["checksum_algorithm"] = "http://dcat-ap.de/def/hashAlgorithms/md/5"
resource["checksum_value"] = "7e2fb748950d6d07ab3f75ac87f6f5da"
with open("tests/data/image.png", "rb") as file:
self.dcc._check_checksum(resource, file)
self.assertEqual(resource["checksum_ok"], True)
def test_check_resource__one_json_in_zip_valid(self):
"""This ZIP file contains just one valid JSON file."""
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.reason = "OK"
with open("tests/data/json-in-zip.zip", "rb") as file:
mock_response.content = file.read()
self.dcc.load_http_complete = MagicMock(return_value=mock_response)
resource = {}
resource["url"] = "http://test.invalid/data"
resource["format"] = "JSON"
self.dcc.check_resource(resource)
self.assertEqual(resource.get("accessible"), True)
self.assertEqual(resource.get("valid"), True)
self.assertEqual(resource.get("http_status"), 200)
self.assertEqual(resource.get("mimetype"), "application/json")
self.assertEqual(resource.get("package_format"), "application/zip")
def test_check_resource__multiple_json_files_in_zip_valid(self):
"""This ZIP file contains several valid JSON files and one image."""
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.reason = "OK"
with open("tests/data/jsons-in-zip.zip", "rb") as file:
mock_response.content = file.read()
self.dcc.load_http_complete = MagicMock(return_value=mock_response)
resource = {}
resource["url"] = "http://test.invalid/data"
resource["format"] = "JSON"
self.dcc.check_resource(resource)
self.assertEqual(resource.get("accessible"), True)
self.assertEqual(resource.get("valid"), True)
self.assertEqual(resource.get("http_status"), 200)
self.assertEqual(resource.get("mimetype"), "application/json")
self.assertEqual(resource.get("package_format"), "application/zip")
def test_check_resource__no_json_in_zip_valid(self):
"""This ZIP file does not contain any JSON file only other files."""
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.reason = "OK"
with open("tests/data/png-in-zip.zip", "rb") as file:
mock_response.content = file.read()
self.dcc.load_http_complete = MagicMock(return_value=mock_response)
resource = {}
resource["url"] = "http://test.invalid/data"
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
resource["format"] = "JSON"
self.dcc.check_resource(resource)
self.assertEqual(resource.get("accessible"), True)
self.assertEqual(resource.get("http_status"), 200)
self.assertEqual(resource.get("mimetype_mismatch"), True)
self.assertEqual(resource.get("package_format"), "application/zip")
def test_is_container(self):
self.dcc.read_allowed_file_formats()
self.assertFalse(self.dcc._is_container("image/png", "PNG"))
self.assertTrue(self.dcc._is_container("application/x-tar", "PNG"))
self.assertTrue(self.dcc._is_container("application/zip", "PNG"))
self.assertFalse(self.dcc._is_container("application/zip", "SHP"))
self.assertFalse(self.dcc._is_container("application/zip", "GTFS"))
self.assertFalse(self.dcc._is_container("application/zip", "ZIP"))
def test_check_resource__shp_with_multiple_layers(self):
"""This shape file contains multiple layers."""
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.reason = "OK"
with open("tests/data/zos116.zip", "rb") as file:
mock_response.content = file.read()
self.dcc.load_http_complete = MagicMock(return_value=mock_response)
resource = {}
resource["url"] = "http://test.invalid/zos116.zip"
resource["format"] = "SHP"
self.dcc.check_resource(resource)
self.assertIsNone(resource.get("error"))
self.assertEqual(resource.get("accessible"), True)
self.assertEqual(resource.get("http_status"), 200)
self.assertEqual(resource.get("valid"), True)
def test_read_previous_results(self):
# Test data to simulate the contents of previous_results.json
test_data = [
{"url": "http://example.com", "status": "valid", "format": "JSON"},
{"url": "http://example.org", "status": "invalid", "format": "XML"},
]
# Write test data to a file 'previous_results.json'
with open("previous_results.json", "w", encoding="utf-8") as f:
for entry in test_data:
f.write(json.dumps(entry) + "\n")
# Call the method to test
self.dcc.read_previous_results("previous_results.json")
# Assertions: Check if the data was loaded correctly into previous_results
self.assertEqual(len(self.dcc.previous_results), 2) # Expect 2 entries
self.assertIn("http://example.com", self.dcc.previous_results)
self.assertIn("http://example.org", self.dcc.previous_results)
self.assertEqual(
self.dcc.previous_results["http://example.com"]["status"], "valid"
)
self.dcc.previous_results["http://example.org"]["status"], "invalid"
)
@patch("os.path.exists", return_value=False)
def test_read_previous_results_file_not_exist(self, mock_exists):
"""Test when the file does not exist."""
self.dcc.read_previous_results("non_existent_file.json")
# Check that the warning log was triggered
self.mock_logger.warning.assert_called_with(
"File 'non_existent_file.json' does not exist. No previous results loaded."
)
@patch("builtins.open", mock_open(read_data="invalid_json"))
@patch("os.path.exists", return_value=True)
def test_read_previous_results_invalid_json(self, mock_exists):
"""Test when the file contains invalid JSON."""
self.dcc.read_previous_results("invalid_json_file.json")
# Check if the error log was triggered for invalid JSON
self.mock_logger.error.assert_called_with(
"Invalid JSON at line 1: Expecting value: line 1 column 1 (char 0)"
)
@patch(
"builtins.open",
mock_open(
read_data='{"status": "valid", "format": "JSON"}\n{"url": "http://example.com", "status": "valid", "format": "JSON"}'
),
)
@patch("os.path.exists", return_value=True)
def test_read_previous_results_missing_url(self, mock_exists):
"""Test when the file has a line with missing 'url'."""
self.dcc.read_previous_results("missing_url_file.json")
# Check if the warning log was triggered for the missing 'url'
self.mock_logger.warning.assert_called_with(
'Line 1 is missing \'url\': {"status": "valid", "format": "JSON"}'
)
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
def test_apply_uri_replacements(self):
"""Test the apply_uri_replacements method."""
# Setup URI replacements
self.dcc.uri_replacements = [
{"regex": r"example\.com", "replaced_by": "test.com"},
{"regex": r"http://", "replaced_by": "https://"},
]
# URL matching both replacements
url = "http://example.com/path"
result = self.dcc.apply_uri_replacements(url)
self.assertEqual(result, "https://test.com/path")
# URL matching only one replacement
url = "http://other.com/path"
result = self.dcc.apply_uri_replacements(url)
self.assertEqual(result, "https://other.com/path")
# URL with no matches
url = "https://unchanged.com/path"
result = self.dcc.apply_uri_replacements(url)
self.assertEqual(result, "https://unchanged.com/path")
# Empty URL
url = ""
result = self.dcc.apply_uri_replacements(url)
self.assertEqual(result, "")
# No URI replacements defined
self.dcc.uri_replacements = []
url = "http://example.com/path"
result = self.dcc.apply_uri_replacements(url)
self.assertEqual(result, "http://example.com/path")
def test_clear_result(self):
"""Test the _clear_result method."""
# Define a resource dictionary with keys to clear and some additional keys
resource = {
"accessible": True,
"checksum_ok": True,
"duration": 1.23,
"error": "Some error",
"etag": "some-etag",
"http_status": 200,
"last_check": "2024-12-27T12:34:56Z",
"mimetype": "application/json",
"mimetype_mismatch": False,
"valid": True,
"url": "http://example.com/data", # This key should remain untouched
"format": "JSON", # This key should remain untouched
}
# Call the _clear_result method
self.dcc._clear_result(resource)
# Check that all keys to clear have been removed
for key in [
"accessible",
"checksum_ok",
"duration",
"error",
"etag",
"http_status",
"last_check",
"mimetype",
"mimetype_mismatch",
"valid",
]:
self.assertNotIn(key, resource)
# Check that unrelated keys remain
self.assertIn("url", resource)
self.assertIn("format", resource)
self.assertEqual(resource["url"], "http://example.com/data")
self.assertEqual(resource["format"], "JSON")