import matplotlib.pyplot as plt
import geopandas as gpd
from frictionless import validate, steps, Resource, transform

# Method to get a column of a resource with all None values removed
def load_column(resource, column_name):
    data = transform(
        resource,
        steps=[
            steps.table_normalize(),
            # BADEGEWAESSERID needed because it is the primary key
            steps.field_filter(names=["BADEGEWAESSERID", column_name]),
            steps.row_filter(formula="{} != None".format(column_name))
        ]
    )
    return data

# validate the data with the given resource json
# Also possible to get the newest data by using the url given in the resource json by removing the path argument
resource = Resource("badegewasser-stammdaten-aktuell.json", path="v_badegewaesser_odata.csv")
report = validate(resource)

if report.valid:
    print("CSV file validated using frictionless")

    # Get the ost column of the data table and convert it to pandas data frame
    resource = Resource("badegewasser-stammdaten-aktuell.json")
    ost = load_column(resource, "UTM_OST")
    ost = ost.to_pandas()

    # # Get the nord column of the data table and convert it to pandas data frame
    resource = Resource("badegewasser-stammdaten-aktuell.json")
    nord = load_column(resource, "UTM_NORD")
    nord = nord.to_pandas()

    # # Load the Schleswig-Holstein map
    sh_df = gpd.read_file("./kreise.min.geojson")

    # # plot the map
    axes = sh_df.plot()

    # # plot the coordinates of the police stations onto the SH map
    axes.scatter(ost, nord, c='orange', s=0.5)

    # # Turn x and y axis off and save figure
    plt.axis('off')
    plt.savefig('output.png', dpi=200)

else:
    print("The CSV file is not valid:")
    print(report.flatten(["rowNumber", "fieldNumber", "type"]))