Skip to content
Snippets Groups Projects
generate_report.py 5.85 KiB
Newer Older
  • Learn to ignore specific revisions
  • Jesper Zedlitz's avatar
    Jesper Zedlitz committed
    #!/usr/bin/env python3
    
    import json
    from collections import Counter
    
    logfile_path = "result.jsonl"
    
    
    def write_diagram(id, title, counter, counter_publisher):
        print("<div>")
        print(f"<h2>{title}</h2>")
        print(f"<div id='vis{id}' style='max-width: 400px;'></div>")
        print('<script type="text/javascript">')
    
    Jesper Zedlitz's avatar
    Jesper Zedlitz committed
        print(f"new ApexCharts(document.querySelector('#vis{id}'),")
        print("{ chart: { type: 'donut' },")
        print(f"series: [{counter[True]}, {counter[False]}, {counter[None]}],")
        print("labels: ['korrekt', 'fehlerhaft', 'nicht geprüft'],")
        print('colors: ["#1eae9c", "#d4004b", "#a4adb6"]')
        print("}).render();")
    
    Jesper Zedlitz's avatar
    Jesper Zedlitz committed
        print("</script>")
    
        print("<h3>Publishers affected</h3>")
        print("<table>")
    
        for p in counter_publisher:
            print(f"<tr><td>{p}</td><td>{counter_publisher[p]}</td></tr>")
    
        print("</table>")
        print("<hr/>")
        print("</div>")
    
    
    counter_valid = Counter()
    counter_404 = Counter()
    counter_accessible = Counter()
    counter_mimetype_mismatch = Counter()
    counter_checksum_ok = Counter()
    counter_schema_valid = Counter()
    
    counter_publisher_accessible = Counter()
    counter_publisher_checksum = Counter()
    counter_publisher_valid = Counter()
    counter_publisher_schema_valid = Counter()
    counter_publisher_mimetype_mismatch = Counter()
    
    distributions_with_problems = {}
    
    with open(logfile_path, "r") as file:
        for line in file:
            entry = json.loads(line.strip())
            publisher = entry.get("publisher")
            id = entry.get("id")
    
            valid = entry.get("valid", None)
            counter_valid[valid] += 1
            if valid is False:
                counter_publisher_valid[publisher] += 1
                distributions_with_problems[id] = entry
    
            accessible = entry.get("accessible", None)
            counter_accessible[accessible] += 1
            if accessible is False:
                counter_publisher_accessible[publisher] += 1
                distributions_with_problems[id] = entry
    
            checksum_ok = entry.get("checksum_ok", None)
            counter_checksum_ok[checksum_ok] += 1
            if checksum_ok is False:
                counter_publisher_checksum[publisher] += 1
                distributions_with_problems[id] = entry
    
            schema_valid = entry.get("schema_valid", None)
            counter_schema_valid[schema_valid] += 1
            if schema_valid is False:
                counter_publisher_schema_valid[publisher] += 1
                distributions_with_problems[id] = entry
    
            mimetype_correct = not entry.get("mimetype_mismatch", False)
            counter_mimetype_mismatch[mimetype_correct] += 1
            if mimetype_correct is False:
                counter_publisher_mimetype_mismatch[publisher] += 1
                distributions_with_problems[id] = entry
    
    print("<!doctype html>")
    print("<html>")
    print("  <head>")
    print("    <title>DCAT Catalog Check</title>")
    print('    <script src="https://cdn.jsdelivr.net/npm/apexcharts"></script>')
    
    print(
        '    <link rel="stylesheet" href="https://cdn.datatables.net/2.1.8/css/dataTables.dataTables.css" />'
    )
    
    Jesper Zedlitz's avatar
    Jesper Zedlitz committed
    print('    <script src="https://code.jquery.com/jquery-3.7.1.min.js"></script>')
    print('    <script src="https://cdn.datatables.net/2.1.8/js/dataTables.js"></script>')
    print("  </head>")
    print("  <body style='background: #f2f4f7;'>")
    print(" <h1>Results of the DCAT Catalog Check</h1>")
    
    
    write_diagram("1", "Availability", counter_accessible, counter_publisher_accessible)
    write_diagram("2", "File content", counter_valid, counter_publisher_valid)
    write_diagram(
        "3",
        "MIME type",
        counter_mimetype_mismatch,
        counter_publisher_mimetype_mismatch,
    )
    write_diagram("4", "Checksum", counter_checksum_ok, counter_publisher_checksum)
    write_diagram(
        "5", "Frictionless Schema", counter_schema_valid, counter_publisher_schema_valid
    )
    
    print("<div>")
    print("<h2>Distributionen with errors</h2>")
    
    print('<table class="table" id="distributions">')
    
        "<thead><tr><th>Publisher</th><th>Format</th><th>accessible</th><th>HTTP status</th><th>content correct</th><th>MIME type wrong</th><th>MIME type</th><th>checksum correct</th><th>schema valid</th><th>URL</th><th>Error message</th></tr>"
    
    )
    print("<tr>")
    print('<th><input type="text" placeholder="Filter by publisher" /></th>')
    print('<th><input type="text" placeholder="Filter by format" /></th>')
    
    print('<th><input type="text" placeholder="Filter by accessibility" /></th>')
    print('<th><input type="text" placeholder="Filter by HTTP status" /></th>')
    
    print('<th><input type="text" placeholder="Filter by correct content" /></th>')
    print('<th><input type="text" placeholder="Filter by MIME type error" /></th>')
    print('<th><input type="text" placeholder="Filter by MIME type" /></th>')
    print('<th><input type="text" placeholder="Filter by checksum" /></th>')
    print('<th><input type="text" placeholder="Filter by schema valid" /></th>')
    print('<th><input type="text" placeholder="Filter by url" /></th>')
    
    print('<th><input type="text" placeholder="Filter by error message" /></th>')
    
    print("</tr>")
    print("</thead>")
    
    Jesper Zedlitz's avatar
    Jesper Zedlitz committed
    print("<tbody>")
    
    for dist in distributions_with_problems:
        entry = distributions_with_problems[dist]
    
            f"<tr><td>{entry.get('publisher')}</td><td>{entry.get('format')}</td><td>{entry.get('accessible','')}</td><td>{entry.get('http_status','')}</td><td>{entry.get('valid','')}</td><td>{entry.get('mimetype_mismatch','')}</td><td>{entry.get('mimetype','')}</td><td>{entry.get('checksum_ok','')}</td><td>{entry.get('schema_valid','')}</td><td>{entry.get('url')}</td><td>{entry.get('error')}</td></tr>"
    
    Jesper Zedlitz's avatar
    Jesper Zedlitz committed
    
    print("</tbody></table>")
    
    print("</div>")
    
    
    print("""
    <script>
     $(document).ready(function() {
        var table = $('#distributions').DataTable();
    
        table.columns().every(function() {
          var that = this;
          
          $('input', this.header()).on('keyup change', function() {
            if (that.search() !== this.value) {
              that
                .search(this.value)
                .draw();
            }
          });
        });
      });
    
    </script>
    """)
    
    Jesper Zedlitz's avatar
    Jesper Zedlitz committed
    
    print("</body></html>")