Skip to content
Snippets Groups Projects
Commit 022d5c1a authored by root's avatar root
Browse files

Initial commit, first 3 examples

parents
Branches
No related tags found
No related merge requests found
*.mp4
*.jpg
*.png
denkmal_imgs/
import pandas as pd
from urllib.request import urlopen, ProxyHandler, build_opener, install_opener
import json
# This is an example to extract 10 persons with the highest income of each year from a rather difficult designed json
# These are proxy settings. If you are behind a proxy just comment them in and swap the ip and port with your proxy
proxy_support = ProxyHandler({"http": "http://10.65.117.35:3128", #<proxy-ip>:<proxy-port>",
"https": "http://10.65.117.35:3128"}) #<proxy-ip>:<proxy-port>"}) # maybe you need to use https instead of http in the address depending on your proxy settings
opener = build_opener(proxy_support)
install_opener(opener)
# The url to download the json
url = "https://phpefi.schleswig-holstein.de/vo/vo_opendata/export.json"
# store the response of the request
response = urlopen(url)
# converting the response into a json
data_json = json.loads(response.read())
# Now due to bad data quality and not ideal data format we adjust the json to be better convertable to a table
data_json = data_json["Unternehmen"]
clean_dict = []
# Iterate over the json file, key is a string and unternehmen is a Json Object
# Json Arrays are necessary for the pandas.json_normalize method to work correctly and be able to convert it to a table
for key, unternehmen in data_json.items():
list_offenlegungen = []
# go one level deeper, jahr is a string and is the key of the key value pairs
# convert all "Offenlegungen" from Json Objects to Json Arrays
for jahr in unternehmen["Offenlegungen"]:
list_personen = []
# convert all "Personen" from Json Objects to Json Arrays
for person in unternehmen["Offenlegungen"][jahr]["Personen"]:
# The field "vm_summe_bezuege" needs to be normalized and converted from string to float
# First we put "0" for all entries that have an empty string ""
tmp = "0" if unternehmen["Offenlegungen"][jahr]["Personen"][person]["vm_summe_bezuege"] == ""\
else unternehmen["Offenlegungen"][jahr]["Personen"][person]["vm_summe_bezuege"]
# Now we remove the dots to show thousands, replace the german ',' with the english '.' for decimal numbers, remove other things to sanitize the string,
# so we can convert it to a float. This step changes depending on the data. Check the errors of the float conversion to know how to further sanitize the strings.
tmp = float(tmp.replace(".", "").replace(",",".").replace(" ", "").replace("", "").lower().replace("euro", "").replace(".-", "").replace("1:", "").split("(")[0])
unternehmen["Offenlegungen"][jahr]["Personen"][person]["vm_summe_bezuege"] = tmp
# append every person to a list to make an json array out of it
list_personen.append(unternehmen["Offenlegungen"][jahr]["Personen"][person])
# put the before mentioned list into the json
unternehmen["Offenlegungen"][jahr]["Personen"] = list_personen
# append every Offenlegung with the new array of Personen into a list
list_offenlegungen.append(unternehmen["Offenlegungen"][jahr])
unternehmen["Offenlegungen"] = list_offenlegungen
# Remove all "Unternehmen", that do not have any "Offenlegungen"
if len(unternehmen["Offenlegungen"]) > 0:
clean_dict.append(unternehmen)
# Convert Json to dataframe(a Table)
# record path is the path to teh deepest level
# meta is used to add Fields along the way down to the table
df = pd.json_normalize(clean_dict,
record_path=["Offenlegungen", "Personen"],
meta=["u_name", ["Offenlegungen", "v_jahr"]])
# These are all Fields of "Personen", that should be dropped out of the final table.
# You can comment out the fields, that you want to keep.
df = df.drop(columns=[
#"vm_id",
#"vm_veroeffentlichung_idx",
"vm_veroeffentlichung_zulaessig",
"vm_angaben_freiwillig",
"vm_keine_angaben",
#"vm_summe_bezuege",
#"vm_summe_zusagen",
"vm_anreize",
#"vm_bezuege_dritte",
"vm_reg_bezuege",
"vm_reg_beendigung_rueckstellung",
"vm_reg_beendigung_aenderung",
"vm_reg_beendigung_voraussetzung",
"vm_vorzeitig_bezuege",
"vm_vorzeitig_voraussetzungen",
"vm_verantwortlich",
#"vm_angezeigte_funktion"
])
# Print for every Year the top 10 with the highest "vm_summe_bezuege" in our case the years are saved as strings
for jahr in ["2020", "2021", "2022", "2023", "2024"]:
# Make a new line and print the year to have a label for each table
print("\n", jahr)
# This will put all rows with the value jahr in the column "Offenlegung.v_jahr" into one table and save the table in df_jahr
df_jahr = df.loc[df["Offenlegungen.v_jahr"] == jahr]
# sort the rows using the vm_summe_bezuege column and show them in descending order, then get the first 10 rows
df_jahr = df_jahr.sort_values("vm_summe_bezuege", ascending=False).take(range(10))
# print the table
print(df_jahr)
import pandas as pd
import matplotlib.pyplot as plt
# Dieses Beispiel erstellt ein Histogram der Nabenhoehe von Windkraftanlagen aus einer csv Datei
###################################################################################################
# WITH PROXY
#from urllib.request import urlopen, ProxyHandler, build_opener, install_opener
#import io
# These are proxy settings. If you are behind a proxy just comment them in and swap the ip and port with your proxy
#proxy_support = ProxyHandler({"http": "http://<proxy-ip>:<proxy-port>",
# "https": "http://<proxy-ip>:<proxy-port>"}) # maybe you need to use https instead of http in the address depending on your proxy settings
#opener = build_opener(proxy_support)
#install_opener(opener)
# The url to download the json
#url = "https://opendata.schleswig-holstein.de/collection/windkraftanlagen/aktuell.csv"
# store the response of the request and unpack it
#response = urlopen(url)
#csv_byte = response.read()
# read the csv file, interpret ';' as the seperator of columns and ',' as a decimal indicator and convert it to '.' as decimal indicator
#df = pd.read_csv(io.StringIO(csv_byte.decode("utf-8")), sep=';', decimal=',')
###################################################################################################
# WITHOUT PROXY:
# read the csv file, interpret ';' as the seperator of columns and ',' as a decimal indicator and convert it to '.' as decimal indicator in the table
df = pd.read_csv("https://opendata.schleswig-holstein.de/collection/windkraftanlagen/aktuell.csv", sep=';', decimal=',')
###################################################################################################
# Drop unwanted columns. Comment out the columns to keep.
# You can use print(df.columns) to see all available columns
df = df.drop(columns=[ 'KREIS',
# 'GEMEINDE',
'TYP', 'HERSTELLER',
# 'NABENHOEHE',
# 'ROTORDURCHMESSER',
'SCHALLLEISTUNGSPEGEL',
# 'LEISTUNG',
'LEISTUNGSBEZUG',
# 'OSTWERT', 'NORDWERT', 'GENEHMIGT_AM',
'INBETRIEBNAHME', 'STATUS', 'BST_NR', 'ANL_NR',
# 'AKTENZEICHEN',
'DATENDATUM', 'DATENQUELLE'
])
# Which column to use for the histogram
column_of_interest = "NABENHOEHE"
# print the highest 20 values of the column of the dataset
print(df.sort_values(column_of_interest, ascending=False).take(range(20)))
# plot a histogram for each numerical feature
# drop all rows that contain NaN in the specified column
df_clean = df.dropna(subset=[column_of_interest])
df_clean[column_of_interest].hist(bins=25, rwidth=.9)
# Put labels on the x and y axis
plt.xlabel(column_of_interest.title())
plt.ylabel("Anzahl")
plt.savefig("histogram.png")
plt.show()
import pandas as pd
from urllib.request import urlopen, ProxyHandler, build_opener, install_opener
import json
import os
import wget
# Dieses Beispiel erstellt eine Slideshow aus Bildern von Denkmälern in Neumünster, die URLs zu den Bildern sind in einer JSON Datei
# These are proxy settings. If you are behind a proxy just comment them in and swap the ip and port with your proxy
#proxy_support = ProxyHandler({"http": "http://<proxy-ip>:<proxy-port>",
# "https": "http://<proxy-ip>:<proxy-port>"}) # maybe you need to use https instead of http in the address depending on your proxy settings
#opener = build_opener(proxy_support)
#install_opener(opener)
# Die url zu der json datei
url = "https://opendata.schleswig-holstein.de/dataset/eddb1d7e-7df3-421a-97c7-447e1b78c94c/resource/d413e41c-b13e-4984-8dbe-4725a9a188ec/download/denkmalliste.json"
# download der json
response = urlopen(url)
# Lies die Json aus der Response aus
data_json = json.loads(response.read())
# Sammle die Fotos und die dazugehörigen Bezeichnungen aller Denkmäler in einer Liste
FotoURL_list = []
for denkmal in data_json:
if "FotoURL" in denkmal.keys():
FotoURL_list.append((denkmal["FotoURL"],denkmal["Bezeichnung"]))
# Speichere "max" Anzahl an images in dem Ordner denkmal_imgs
max = 10
os.makedirs("denkmal_imgs", exist_ok=True)
output_dir = "./denkmal_imgs/"
file_list = []
i = 0
for img, bezeichnung in FotoURL_list[:max]:
out_path = output_dir + "img_" + str(i).zfill(3) + ".jpg"
if os.path.exists(out_path):
os.remove(out_path)
# Lade das Bild herunter und speichere es in dem Ordner denkmal_imgs
filename = wget.download(img, out=out_path)
# Schreibe die Bezeichnung auf das Bild mithilfe von ffmpeg
os.system(f"ffmpeg -y -i {filename} -vf \"drawtext=text='{bezeichnung}':fontcolor=white:box=1:boxcolor=black@0.6:fontsize=18:x=50:y=50:\" {filename}")
file_list.append(filename)
i = i + 1
# Nutze ffmpeg zum erstellen einer Slideshow
# Dauer pro angezeigtem Bild in Sekunden
duration_image = 2.5
# Dauer der Übergänge
duration_transition = 0.5
# Es wird ein langer Befehl erstellt um ffmpeg damit aufzurufen
create_slideshow_command = "ffmpeg"
for img in file_list:
create_slideshow_command += f" -loop 1 -t {duration_image} -i {img}"
create_slideshow_command += " -filter_complex \""
for i in range(len(file_list[:max])):
# Jedes Bild wird auf die gleiche Auflösung skaliert
create_slideshow_command += f"[{i}]scale=1028:764,pad=1028:764[a{i}];"
# Die Übergänge werden hinzugefügt und eine slideshow daraus erstellt
# -r 25 = framerate auf 25 fps, -pix_fmt yuv420p = pixel format
# Die Art des Übergangs (Alternativen: slideright, fade, smoothleft, ...(https://trac.ffmpeg.org/wiki/Xfade)
transition = "smoothleft"
for i in range(len(file_list[:max])-1):
if i == 0:
create_slideshow_command += f" [a0][a1]xfade=transition={transition}:duration={duration_transition}:offset={(i+1) * (duration_image - duration_transition)}[fa{i}];"
elif i == len(file_list[:max])-2:
create_slideshow_command += f" [fa{i-1}][a{i+1}]xfade=transition={transition}:duration={duration_transition}:offset={(i+1) * (duration_image - duration_transition)}[fa{i}]\""
else:
create_slideshow_command += f" [fa{i-1}][a{i+1}]xfade=transition={transition}:duration={duration_transition}:offset={(i+1) * (duration_image - duration_transition)}[fa{i}];"
create_slideshow_command += " -map \"[fa{}]\" -r 25 -vcodec libx264 slideshow.mp4".format(len(file_list[:max])-2)
# Der Befehl wird an das System übergeben
os.system(create_slideshow_command)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment