import os
import unittest
import json
from unittest.mock import patch, mock_open, MagicMock
from dcat_catalog_check import (
    DcatCatalogCheck,
)
from rdflib import Graph
from rdflib.namespace import RDF, DCAT


class TestDcatCatalogCheck(unittest.TestCase):
    def setUp(self):
        self.dcc = DcatCatalogCheck("http://test.invalid:8000/", "my_api_key")
        # Mock the logger to capture log messages
        self.logger_patch = patch.object(self.dcc, "logger", MagicMock())
        self.mock_logger = self.logger_patch.start()

    def tearDown(self):
        if os.path.exists("my_api_key"):
            os.remove("my_api_key")
        if os.path.exists("previous_results.json"):
            os.remove("previous_results.json")
        self.logger_patch.stop()

    def test_is_mime_type_compatible(self):
        self.dcc.allowed_file_formats = {
            "JSON": ["application/json"],
            "XML": ["application/xml"],
        }

        self.assertTrue(self.dcc.is_mime_type_compatible(
            "JSON", "application/json"))
        self.assertFalse(self.dcc.is_mime_type_compatible(
            "JSON", "application/xml"))
        self.assertFalse(
            self.dcc.is_mime_type_compatible(
                "UnknownFormat", "application/json")
        )

    def test_read_allowed_file_formats(self):
        with patch(
            "builtins.open",
            unittest.mock.mock_open(
                read_data='{"JSON": ["application/json"], "XML": ["application/xml"]}'
            ),
        ):
            formats = self.dcc.read_allowed_file_formats()
            self.assertEqual(
                formats, {"JSON": ["application/json"],
                          "XML": ["application/xml"]}
            )

    def test_load_uri_replacements(self):
        with patch("os.path.exists", return_value=True), patch(
            "builtins.open",
            unittest.mock.mock_open(
                read_data='[{"regex": "old", "replaced_by": "new"}]'
            ),
        ):
            replacements = self.dcc.load_uri_replacements()
            self.assertEqual(
                replacements, [{"regex": "old", "replaced_by": "new"}])

    # Simulate that the file does not exist

    @patch("os.path.exists", return_value=False)
    def test_load_uri_replacements_file_not_exist(self, mock_exists):
        # Call the method to test
        replacements = self.dcc.load_uri_replacements()
        # Assert that it returns an empty list
        self.assertEqual(replacements, [])

    @patch("dcat_catalog_check.requests.get")
    def test_load_http_complete(self, mock_get):
        mock_response = MagicMock()
        mock_response.content = b"content"
        mock_get.return_value = mock_response

        response = self.dcc.load_http_complete("http://example.com")
        self.assertEqual(response.content, b"content")

    def test_get_publisher(self):
        g = Graph()
        g.parse(
            data='@prefix dcat: <http://www.w3.org/ns/dcat#> .\n@prefix dct: <http://purl.org/dc/terms/> .\n@prefix foaf: <http://xmlns.com/foaf/0.1/> .\n<http://example.org/DS> a dcat:Dataset; dct:publisher [ a foaf:Organization ; foaf:name "The publisher" ] .',
            format="ttl",
        )
        for dataset in g.subjects(predicate=RDF.type, object=DCAT.Dataset):
            result = self.dcc._get_publisher(g, dataset)
            self.assertEqual("The publisher", result)

    def test_get_publisher_url(self):
        g = Graph()
        g.parse(
            data="@prefix dcat: <http://www.w3.org/ns/dcat#> .\n@prefix dct: <http://purl.org/dc/terms/> .\n<http://example.org/DS> a dcat:Dataset; dct:publisher <http://example.org/publisher> .",
            format="ttl",
        )
        for dataset in g.subjects(predicate=RDF.type, object=DCAT.Dataset):
            result = self.dcc._get_publisher(g, dataset)
            self.assertEqual("http://example.org/publisher", result)

    def test_check_resource__json_valid(self):
        mock_response = MagicMock()
        mock_response.status_code = 200
        mock_response.reason = "OK"
        with open("tests/data/correct.json", "rb") as file:
            mock_response.content = file.read()
        self.dcc.load_http_complete = MagicMock(return_value=mock_response)

        resource = {}
        resource["url"] = "http://test.invalid/data"
        resource["format"] = "JSON"
        self.dcc.check_resource(resource)
        self.assertEqual(resource["accessible"], True)
        self.assertEqual(resource["valid"], True)
        self.assertEqual(resource["http_status"], 200)
        self.assertEqual(resource["mimetype"], "application/json")

    def test_check_resource__json_gz_valid(self):
        mock_response = MagicMock()
        mock_response.status_code = 200
        mock_response.reason = "OK"
        with open("tests/data/correct.json.gz", "rb") as file:
            mock_response.content = file.read()
        self.dcc.load_http_complete = MagicMock(return_value=mock_response)

        resource = {}
        resource["url"] = "http://test.invalid/data"
        resource["format"] = "JSON"
        self.dcc.check_resource(resource)
        self.assertEqual(resource["accessible"], True)
        self.assertEqual(resource["valid"], True)
        self.assertEqual(resource["http_status"], 200)
        self.assertEqual(resource["mimetype"], "application/json")
        self.assertEqual(resource["compress_format"], "application/gzip")

    def test_check_resource__json_bz2_valid(self):
        mock_response = MagicMock()
        mock_response.status_code = 200
        mock_response.reason = "OK"
        with open("tests/data/correct.json.bz2", "rb") as file:
            mock_response.content = file.read()
        self.dcc.load_http_complete = MagicMock(return_value=mock_response)

        resource = {}
        resource["url"] = "http://test.invalid/data"
        resource["format"] = "JSON"
        self.dcc.check_resource(resource)
        self.assertEqual(resource["accessible"], True)
        self.assertEqual(resource["valid"], True)
        self.assertEqual(resource["http_status"], 200)
        self.assertEqual(resource["mimetype"], "application/json")
        self.assertEqual(resource["compress_format"], "application/x-bzip2")

    def test_check_resource__json_xz_valid(self):
        mock_response = MagicMock()
        mock_response.status_code = 200
        mock_response.reason = "OK"
        with open("tests/data/correct.json.xz", "rb") as file:
            mock_response.content = file.read()
        self.dcc.load_http_complete = MagicMock(return_value=mock_response)

        resource = {}
        resource["url"] = "http://test.invalid/data"
        resource["format"] = "JSON"
        self.dcc.check_resource(resource)
        self.assertEqual(resource["accessible"], True)
        self.assertEqual(resource["valid"], True)
        self.assertEqual(resource["http_status"], 200)
        self.assertEqual(resource["mimetype"], "application/json")
        self.assertEqual(resource["compress_format"], "application/x-xz")

    def test_check_resource__json_invalid(self):
        mock_response = MagicMock()
        mock_response.status_code = 200
        mock_response.reason = "OK"
        with open("tests/data/incorrect.json", "rb") as file:
            mock_response.content = file.read()
        self.dcc.load_http_complete = MagicMock(return_value=mock_response)

        resource = {}
        resource["url"] = "http://test.invalid/data"
        resource["format"] = "JSON"
        self.dcc.check_resource(resource)
        self.assertEqual(resource["accessible"], True)
        self.assertEqual(resource["valid"], False)
        self.assertEqual(resource["http_status"], 200)

    def test_check_resource__xml_valid(self):
        mock_response = MagicMock()
        mock_response.status_code = 200
        mock_response.reason = "OK"
        with open("tests/data/correct.xml", "rb") as file:
            mock_response.content = file.read()
        self.dcc.load_http_complete = MagicMock(return_value=mock_response)

        resource = {}
        resource["url"] = "http://test.invalid/data"
        resource["format"] = "XML"
        self.dcc.check_resource(resource)
        self.assertEqual(resource["accessible"], True)
        self.assertEqual(resource["valid"], True)
        self.assertEqual(resource["http_status"], 200)

    def test_check_resource__png_valid(self):
        mock_response = MagicMock()
        mock_response.status_code = 200
        mock_response.reason = "OK"
        with open("tests/data/image.png", "rb") as file:
            mock_response.content = file.read()
        self.dcc.load_http_complete = MagicMock(return_value=mock_response)

        resource = {}
        resource["url"] = "http://test.invalid/data"
        resource["format"] = "PNG"
        resource["checksum_algorithm"] = (
            "http://spdx.org/rdf/terms#checksumAlgorithm_sha1"
        )
        resource["checksum_value"] = "a8643241029f9779302874db5c18b0f0bacbdd25"

        self.dcc.check_resource(resource)
        self.assertEqual(resource["accessible"], True)
        self.assertEqual(resource["valid"], True)
        self.assertEqual(resource["http_status"], 200)
        self.assertEqual(resource["mimetype"], "image/png")
        self.assertEqual(resource["checksum_ok"], True)

    def test_check_checksum(self):
        """The checksum check also works with the old DCAT-AT.de algorithm specifications"""
        resource = {}
        resource["checksum_algorithm"] = "http://dcat-ap.de/def/hashAlgorithms/md/5"
        resource["checksum_value"] = "7e2fb748950d6d07ab3f75ac87f6f5da"
        with open("tests/data/image.png", "rb") as file:
            self.dcc._check_checksum(resource, file)
        self.assertEqual(resource["checksum_ok"], True)

    def test_check_resource__one_json_in_zip_valid(self):
        """This ZIP file contains just one valid JSON file."""
        mock_response = MagicMock()
        mock_response.status_code = 200
        mock_response.reason = "OK"
        with open("tests/data/json-in-zip.zip", "rb") as file:
            mock_response.content = file.read()
        self.dcc.load_http_complete = MagicMock(return_value=mock_response)

        resource = {}
        resource["url"] = "http://test.invalid/data"
        resource["format"] = "JSON"
        self.dcc.check_resource(resource)
        self.assertEqual(resource.get("accessible"), True)
        self.assertEqual(resource.get("valid"), True)
        self.assertEqual(resource.get("http_status"), 200)
        self.assertEqual(resource.get("mimetype"), "application/json")
        self.assertEqual(resource.get("package_format"), "application/zip")

    def test_check_resource__multiple_json_files_in_zip_valid(self):
        """This ZIP file contains several valid JSON files and one image."""
        mock_response = MagicMock()
        mock_response.status_code = 200
        mock_response.reason = "OK"
        with open("tests/data/jsons-in-zip.zip", "rb") as file:
            mock_response.content = file.read()
        self.dcc.load_http_complete = MagicMock(return_value=mock_response)

        resource = {}
        resource["url"] = "http://test.invalid/data"
        resource["format"] = "JSON"
        self.dcc.check_resource(resource)
        self.assertEqual(resource.get("accessible"), True)
        self.assertEqual(resource.get("valid"), True)
        self.assertEqual(resource.get("http_status"), 200)
        self.assertEqual(resource.get("mimetype"), "application/json")
        self.assertEqual(resource.get("package_format"), "application/zip")

    def test_check_resource__no_json_in_zip_valid(self):
        """This ZIP file does not contain any JSON file only other files."""
        mock_response = MagicMock()
        mock_response.status_code = 200
        mock_response.reason = "OK"
        with open("tests/data/png-in-zip.zip", "rb") as file:
            mock_response.content = file.read()
        self.dcc.load_http_complete = MagicMock(return_value=mock_response)

        resource = {}
        resource["url"] = "http://test.invalid/data"
        resource["format"] = "JSON"
        self.dcc.check_resource(resource)
        self.assertEqual(resource.get("accessible"), True)
        self.assertEqual(resource.get("http_status"), 200)
        self.assertEqual(resource.get("mimetype_mismatch"), True)
        self.assertEqual(resource.get("package_format"), "application/zip")

    def test_is_container(self):
        self.dcc.read_allowed_file_formats()
        self.assertFalse(self.dcc._is_container("image/png", "PNG"))
        self.assertTrue(self.dcc._is_container("application/x-tar", "PNG"))
        self.assertTrue(self.dcc._is_container("application/zip", "PNG"))
        self.assertFalse(self.dcc._is_container("application/zip", "SHP"))
        self.assertFalse(self.dcc._is_container("application/zip", "GTFS"))
        self.assertFalse(self.dcc._is_container("application/zip", "ZIP"))

    def test_check_resource__shp_with_multiple_layers(self):
        """This shape file contains multiple layers."""
        mock_response = MagicMock()
        mock_response.status_code = 200
        mock_response.reason = "OK"
        with open("tests/data/zos116.zip", "rb") as file:
            mock_response.content = file.read()
        self.dcc.load_http_complete = MagicMock(return_value=mock_response)

        resource = {}
        resource["url"] = "http://test.invalid/zos116.zip"
        resource["format"] = "SHP"

        self.dcc.check_resource(resource)

        self.assertIsNone(resource.get("error"))
        self.assertEqual(resource.get("accessible"), True)
        self.assertEqual(resource.get("http_status"), 200)
        self.assertEqual(resource.get("valid"), True)

    def test_read_previous_results(self):
        # Test data to simulate the contents of previous_results.json
        test_data = [
            {"url": "http://example.com", "status": "valid", "format": "JSON"},
            {"url": "http://example.org", "status": "invalid", "format": "XML"},
        ]

        # Write test data to a file 'previous_results.json'
        with open("previous_results.json", "w", encoding="utf-8") as f:
            for entry in test_data:
                f.write(json.dumps(entry) + "\n")

        # Call the method to test
        self.dcc.read_previous_results("previous_results.json")

        # Assertions: Check if the data was loaded correctly into previous_results
        self.assertEqual(len(self.dcc.previous_results), 2)  # Expect 2 entries
        self.assertIn("http://example.com", self.dcc.previous_results)
        self.assertIn("http://example.org", self.dcc.previous_results)
        self.assertEqual(
            self.dcc.previous_results["http://example.com"]["status"], "valid"
        )
        self.assertEqual(
            self.dcc.previous_results["http://example.org"]["status"], "invalid"
        )

    @patch("os.path.exists", return_value=False)
    def test_read_previous_results_file_not_exist(self, mock_exists):
        """Test when the file does not exist."""
        self.dcc.read_previous_results("non_existent_file.json")
        # Check that the warning log was triggered
        self.mock_logger.warning.assert_called_with(
            "File 'non_existent_file.json' does not exist. No previous results loaded."
        )

    @patch("builtins.open", mock_open(read_data="invalid_json"))
    @patch("os.path.exists", return_value=True)
    def test_read_previous_results_invalid_json(self, mock_exists):
        """Test when the file contains invalid JSON."""
        self.dcc.read_previous_results("invalid_json_file.json")
        # Check if the error log was triggered for invalid JSON
        self.mock_logger.error.assert_called_with(
            "Invalid JSON at line 1: Expecting value: line 1 column 1 (char 0)"
        )

    @patch(
        "builtins.open",
        mock_open(
            read_data='{"status": "valid", "format": "JSON"}\n{"url": "http://example.com", "status": "valid", "format": "JSON"}'
        ),
    )
    @patch("os.path.exists", return_value=True)
    def test_read_previous_results_missing_url(self, mock_exists):
        """Test when the file has a line with missing 'url'."""
        self.dcc.read_previous_results("missing_url_file.json")
        # Check if the warning log was triggered for the missing 'url'
        self.mock_logger.warning.assert_called_with(
            'Line 1 is missing \'url\': {"status": "valid", "format": "JSON"}'
        )

    def test_apply_uri_replacements(self):
        """Test the apply_uri_replacements method."""
        # Setup URI replacements
        self.dcc.uri_replacements = [
            {"regex": r"example\.com", "replaced_by": "test.com"},
            {"regex": r"http://", "replaced_by": "https://"},
        ]

        # URL matching both replacements
        url = "http://example.com/path"
        result = self.dcc.apply_uri_replacements(url)
        self.assertEqual(result, "https://test.com/path")

        # URL matching only one replacement
        url = "http://other.com/path"
        result = self.dcc.apply_uri_replacements(url)
        self.assertEqual(result, "https://other.com/path")

        # URL with no matches
        url = "https://unchanged.com/path"
        result = self.dcc.apply_uri_replacements(url)
        self.assertEqual(result, "https://unchanged.com/path")

        # Empty URL
        url = ""
        result = self.dcc.apply_uri_replacements(url)
        self.assertEqual(result, "")

        # No URI replacements defined
        self.dcc.uri_replacements = []
        url = "http://example.com/path"
        result = self.dcc.apply_uri_replacements(url)
        self.assertEqual(result, "http://example.com/path")

    def test_clear_result(self):
        """Test the _clear_result method."""
        # Define a resource dictionary with keys to clear and some additional keys
        resource = {
            "accessible": True,
            "checksum_ok": True,
            "duration": 1.23,
            "error": "Some error",
            "etag": "some-etag",
            "http_status": 200,
            "last_check": "2024-12-27T12:34:56Z",
            "mimetype": "application/json",
            "mimetype_mismatch": False,
            "valid": True,
            "url": "http://example.com/data",  # This key should remain untouched
            "format": "JSON",  # This key should remain untouched
        }

        # Call the _clear_result method
        self.dcc._clear_result(resource)

        # Check that all keys to clear have been removed
        for key in [
            "accessible",
            "checksum_ok",
            "duration",
            "error",
            "etag",
            "http_status",
            "last_check",
            "mimetype",
            "mimetype_mismatch",
            "valid",
        ]:
            self.assertNotIn(key, resource)

        # Check that unrelated keys remain
        self.assertIn("url", resource)
        self.assertIn("format", resource)
        self.assertEqual(resource["url"], "http://example.com/data")
        self.assertEqual(resource["format"], "JSON")


if __name__ == "__main__":
    unittest.main()