Newer
Older
#!/usr/bin/env python3
import json
from collections import Counter
logfile_path = "result.jsonl"
def write_diagram(id, title, counter, counter_publisher):
print("<div>")
print(f"<h2>{title}</h2>")
print(f"<div id='vis{id}' style='max-width: 400px;'></div>")
print('<script type="text/javascript">')
print(f"new ApexCharts(document.querySelector('#vis{id}'),")
print("{ chart: { type: 'donut' },")
print(f"series: [{counter[True]}, {counter[False]}, {counter[None]}],")
print("labels: ['korrekt', 'fehlerhaft', 'nicht geprüft'],")
print('colors: ["#1eae9c", "#d4004b", "#a4adb6"]')
print("}).render();")
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
print("</script>")
print("<h3>Publishers affected</h3>")
print("<table>")
for p in counter_publisher:
print(f"<tr><td>{p}</td><td>{counter_publisher[p]}</td></tr>")
print("</table>")
print("<hr/>")
print("</div>")
counter_valid = Counter()
counter_404 = Counter()
counter_accessible = Counter()
counter_mimetype_mismatch = Counter()
counter_checksum_ok = Counter()
counter_schema_valid = Counter()
counter_publisher_accessible = Counter()
counter_publisher_checksum = Counter()
counter_publisher_valid = Counter()
counter_publisher_schema_valid = Counter()
counter_publisher_mimetype_mismatch = Counter()
distributions_with_problems = {}
with open(logfile_path, "r") as file:
for line in file:
entry = json.loads(line.strip())
publisher = entry.get("publisher")
id = entry.get("id")
valid = entry.get("valid", None)
counter_valid[valid] += 1
if valid is False:
counter_publisher_valid[publisher] += 1
distributions_with_problems[id] = entry
accessible = entry.get("accessible", None)
counter_accessible[accessible] += 1
if accessible is False:
counter_publisher_accessible[publisher] += 1
distributions_with_problems[id] = entry
checksum_ok = entry.get("checksum_ok", None)
counter_checksum_ok[checksum_ok] += 1
if checksum_ok is False:
counter_publisher_checksum[publisher] += 1
distributions_with_problems[id] = entry
schema_valid = entry.get("schema_valid", None)
counter_schema_valid[schema_valid] += 1
if schema_valid is False:
counter_publisher_schema_valid[publisher] += 1
distributions_with_problems[id] = entry
mimetype_correct = not entry.get("mimetype_mismatch", False)
counter_mimetype_mismatch[mimetype_correct] += 1
if mimetype_correct is False:
counter_publisher_mimetype_mismatch[publisher] += 1
distributions_with_problems[id] = entry
print("<!doctype html>")
print("<html>")
print(" <head>")
print(" <title>DCAT Catalog Check</title>")
print(' <script src="https://cdn.jsdelivr.net/npm/apexcharts"></script>')
print(
' <link rel="stylesheet" href="https://cdn.datatables.net/2.1.8/css/dataTables.dataTables.css" />'
)
print(' <script src="https://code.jquery.com/jquery-3.7.1.min.js"></script>')
print(' <script src="https://cdn.datatables.net/2.1.8/js/dataTables.js"></script>')
print(" </head>")
print(" <body style='background: #f2f4f7;'>")
print(" <h1>Results of the DCAT Catalog Check</h1>")
write_diagram("1", "Availability", counter_accessible, counter_publisher_accessible)
write_diagram("2", "File content", counter_valid, counter_publisher_valid)
write_diagram(
"3",
"MIME type",
counter_mimetype_mismatch,
counter_publisher_mimetype_mismatch,
)
write_diagram("4", "Checksum", counter_checksum_ok, counter_publisher_checksum)
write_diagram(
"5", "Frictionless Schema", counter_schema_valid, counter_publisher_schema_valid
)
print("<div>")
print("<h2>Distributionen with errors</h2>")
print('<table class="table" id="distributions">')
"<thead><tr><th>Publisher</th><th>Format</th><th>accessible</th><th>HTTP status</th><th>content correct</th><th>MIME type wrong</th><th>MIME type</th><th>checksum correct</th><th>schema valid</th><th>URL</th><th>Error message</th></tr>"
)
print("<tr>")
print('<th><input type="text" placeholder="Filter by publisher" /></th>')
print('<th><input type="text" placeholder="Filter by format" /></th>')
print('<th><input type="text" placeholder="Filter by accessibility" /></th>')
print('<th><input type="text" placeholder="Filter by HTTP status" /></th>')
print('<th><input type="text" placeholder="Filter by correct content" /></th>')
print('<th><input type="text" placeholder="Filter by MIME type error" /></th>')
print('<th><input type="text" placeholder="Filter by MIME type" /></th>')
print('<th><input type="text" placeholder="Filter by checksum" /></th>')
print('<th><input type="text" placeholder="Filter by schema valid" /></th>')
print('<th><input type="text" placeholder="Filter by url" /></th>')
print('<th><input type="text" placeholder="Filter by error message" /></th>')
print("</tr>")
print("</thead>")
print("<tbody>")
for dist in distributions_with_problems:
entry = distributions_with_problems[dist]
f"<tr><td>{entry.get('publisher')}</td><td>{entry.get('format')}</td><td>{entry.get('accessible','')}</td><td>{entry.get('http_status','')}</td><td>{entry.get('valid','')}</td><td>{entry.get('mimetype_mismatch','')}</td><td>{entry.get('mimetype','')}</td><td>{entry.get('checksum_ok','')}</td><td>{entry.get('schema_valid','')}</td><td>{entry.get('url')}</td><td>{entry.get('error')}</td></tr>"
print("</tbody></table>")
print("</div>")
print("""
<script>
$(document).ready(function() {
var table = $('#distributions').DataTable();
table.columns().every(function() {
var that = this;
$('input', this.header()).on('keyup change', function() {
if (that.search() !== this.value) {
that
.search(this.value)
.draw();
}
});
});
});
</script>
""")