#!/usr/bin/env python3 import json from collections import Counter logfile_path = "result.jsonl" def write_diagram(id, title, counter, counter_publisher): print("<div>") print(f"<h2>{title}</h2>") print(f"<div id='vis{id}' style='max-width: 400px;'></div>") print('<script type="text/javascript">') print(f"new ApexCharts(document.querySelector('#vis{id}'),") print("{ chart: { type: 'donut' },") print(f"series: [{counter[True]}, {counter[False]}, {counter[None]}],") print("labels: ['korrekt', 'fehlerhaft', 'nicht geprüft'],") print('colors: ["#1eae9c", "#d4004b", "#a4adb6"]') print("}).render();") print("</script>") print("<h3>Publishers affected</h3>") print("<table>") for p in counter_publisher: print(f"<tr><td>{p}</td><td>{counter_publisher[p]}</td></tr>") print("</table>") print("<hr/>") print("</div>") counter_valid = Counter() counter_404 = Counter() counter_accessible = Counter() counter_mimetype_mismatch = Counter() counter_checksum_ok = Counter() counter_schema_valid = Counter() counter_publisher_accessible = Counter() counter_publisher_checksum = Counter() counter_publisher_valid = Counter() counter_publisher_schema_valid = Counter() counter_publisher_mimetype_mismatch = Counter() distributions_with_problems = {} with open(logfile_path, "r") as file: for line in file: entry = json.loads(line.strip()) publisher = entry.get("publisher") id = entry.get("id") valid = entry.get("valid", None) counter_valid[valid] += 1 if valid is False: counter_publisher_valid[publisher] += 1 distributions_with_problems[id] = entry accessible = entry.get("accessible", None) counter_accessible[accessible] += 1 if accessible is False: counter_publisher_accessible[publisher] += 1 distributions_with_problems[id] = entry checksum_ok = entry.get("checksum_ok", None) counter_checksum_ok[checksum_ok] += 1 if checksum_ok is False: counter_publisher_checksum[publisher] += 1 distributions_with_problems[id] = entry schema_valid = entry.get("schema_valid", None) counter_schema_valid[schema_valid] += 1 if schema_valid is False: counter_publisher_schema_valid[publisher] += 1 distributions_with_problems[id] = entry mimetype_correct = not entry.get("mimetype_mismatch", False) counter_mimetype_mismatch[mimetype_correct] += 1 if mimetype_correct is False: counter_publisher_mimetype_mismatch[publisher] += 1 distributions_with_problems[id] = entry print("<!doctype html>") print("<html>") print(" <head>") print(" <title>DCAT Catalog Check</title>") print(' <script src="https://cdn.jsdelivr.net/npm/apexcharts"></script>') print( ' <link rel="stylesheet" href="https://cdn.datatables.net/2.1.8/css/dataTables.dataTables.css" />' ) print(' <script src="https://code.jquery.com/jquery-3.7.1.min.js"></script>') print(' <script src="https://cdn.datatables.net/2.1.8/js/dataTables.js"></script>') print(" </head>") print(" <body style='background: #f2f4f7;'>") print(" <h1>Results of the DCAT Catalog Check</h1>") write_diagram("1", "Availability", counter_accessible, counter_publisher_accessible) write_diagram("2", "File content", counter_valid, counter_publisher_valid) write_diagram( "3", "MIME type", counter_mimetype_mismatch, counter_publisher_mimetype_mismatch, ) write_diagram("4", "Checksum", counter_checksum_ok, counter_publisher_checksum) write_diagram( "5", "Frictionless Schema", counter_schema_valid, counter_publisher_schema_valid ) print("<div>") print("<h2>Distributionen with errors</h2>") print('<table class="table" id="distributions">') print( "<thead><tr><th>Publisher</th><th>Format</th><th>accessible</th><th>HTTP status</th><th>content correct</th><th>MIME type wrong</th><th>MIME type</th><th>checksum correct</th><th>schema valid</th><th>URL</th><th>Error message</th></tr>" ) print("<tr>") print('<th><input type="text" placeholder="Filter by publisher" /></th>') print('<th><input type="text" placeholder="Filter by format" /></th>') print('<th><input type="text" placeholder="Filter by accessibility" /></th>') print('<th><input type="text" placeholder="Filter by HTTP status" /></th>') print('<th><input type="text" placeholder="Filter by correct content" /></th>') print('<th><input type="text" placeholder="Filter by MIME type error" /></th>') print('<th><input type="text" placeholder="Filter by MIME type" /></th>') print('<th><input type="text" placeholder="Filter by checksum" /></th>') print('<th><input type="text" placeholder="Filter by schema valid" /></th>') print('<th><input type="text" placeholder="Filter by url" /></th>') print('<th><input type="text" placeholder="Filter by error message" /></th>') print("</tr>") print("</thead>") print("<tbody>") for dist in distributions_with_problems: entry = distributions_with_problems[dist] print( f"<tr><td>{entry.get('publisher')}</td><td>{entry.get('format')}</td><td>{entry.get('accessible','')}</td><td>{entry.get('http_status','')}</td><td>{entry.get('valid','')}</td><td>{entry.get('mimetype_mismatch','')}</td><td>{entry.get('mimetype','')}</td><td>{entry.get('checksum_ok','')}</td><td>{entry.get('schema_valid','')}</td><td>{entry.get('url')}</td><td>{entry.get('error')}</td></tr>" ) print("</tbody></table>") print("</div>") print(""" <script> $(document).ready(function() { var table = $('#distributions').DataTable(); table.columns().every(function() { var that = this; $('input', this.header()).on('keyup change', function() { if (that.search() !== this.value) { that .search(this.value) .draw(); } }); }); }); </script> """) print("</body></html>")