import matplotlib.pyplot as plt import geopandas as gpd from frictionless import validate, steps, Resource, transform # Method to get a column of a resource with all None values removed def load_column(resource, column_name): data = transform( resource, steps=[ steps.table_normalize(), # BADEGEWAESSERID needed because it is the primary key steps.field_filter(names=["BADEGEWAESSERID", column_name]), steps.row_filter(formula="{} != None".format(column_name)) ] ) return data # validate the data with the given resource json # Also possible to get the newest data by using the url given in the resource json by removing the path argument resource = Resource("badegewasser-stammdaten-aktuell.json", path="v_badegewaesser_odata.csv") report = validate(resource) if report.valid: print("CSV file validated using frictionless") # Get the ost column of the data table and convert it to pandas data frame resource = Resource("badegewasser-stammdaten-aktuell.json") ost = load_column(resource, "UTM_OST") ost = ost.to_pandas() # # Get the nord column of the data table and convert it to pandas data frame resource = Resource("badegewasser-stammdaten-aktuell.json") nord = load_column(resource, "UTM_NORD") nord = nord.to_pandas() # # Load the Schleswig-Holstein map sh_df = gpd.read_file("./kreise.min.geojson") # # plot the map axes = sh_df.plot() # # plot the coordinates of the police stations onto the SH map axes.scatter(ost, nord, c='orange', s=0.5) # # Turn x and y axis off and save figure plt.axis('off') plt.savefig('output.png', dpi=200) else: print("The CSV file is not valid:") print(report.flatten(["rowNumber", "fieldNumber", "type"]))