Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import matplotlib.pyplot as plt
import geopandas as gpd
from frictionless import validate, steps, Resource, transform
# Method to get a column of a resource with all None values removed
def load_column(resource, column_name):
data = transform(
resource,
steps=[
steps.table_normalize(),
# BADEGEWAESSERID needed because it is the primary key
steps.field_filter(names=["BADEGEWAESSERID", column_name]),
steps.row_filter(formula="{} != None".format(column_name))
]
)
return data
# validate the data with the given resource json
# Also possible to get the newest data by using the url given in the resource json by removing the path argument
resource = Resource("badegewasser-stammdaten-aktuell.json", path="v_badegewaesser_odata.csv")
report = validate(resource)
if report.valid:
print("CSV file validated using frictionless")
# Get the ost column of the data table and convert it to pandas data frame
resource = Resource("badegewasser-stammdaten-aktuell.json")
ost = load_column(resource, "UTM_OST")
ost = ost.to_pandas()
# # Get the nord column of the data table and convert it to pandas data frame
resource = Resource("badegewasser-stammdaten-aktuell.json")
nord = load_column(resource, "UTM_NORD")
nord = nord.to_pandas()
# # Load the Schleswig-Holstein map
sh_df = gpd.read_file("./kreise.min.geojson")
# # plot the map
axes = sh_df.plot()
# # plot the coordinates of the police stations onto the SH map
axes.scatter(ost, nord, c='orange', s=0.5)
# # Turn x and y axis off and save figure
plt.axis('off')
plt.savefig('output.png', dpi=200)
else:
print("The CSV file is not valid:")
print(report.flatten(["rowNumber", "fieldNumber", "type"]))