Use fallback values for bsh forecast

16095bc3 · Michel Spils · e08cf546 · 16095bc3
Commit 16095bc3 authored 7 months ago by Michel Spils
--- a/src/scripts/bsh_extractor_v2.py
+++ b/src/scripts/bsh_extractor_v2.py
@@ -3,7 +3,7 @@ from io import TextIOWrapper
 from pathlib import Path
 from typing import Tuple
 from zipfile import ZipFile
+import numpy as np
 import pandas as pd
@@ -88,11 +88,18 @@ def read_dat_file(zip_file:Path,dat_file:str,tsVorh:str) -> pd.DataFrame:
        with my_zip.open(dat_file) as csv_file:
            csv_file_str = TextIOWrapper(csv_file)
-            df = pd.read_fwf(filepath_or_buffer=csv_file_str,colspecs=[(7,19),(23,26),(26,28),(40,43),(45,48)],header=None,skiprows=1,parse_dates=[0],date_format="%Y%m%d%H%M")
+            df = pd.read_fwf(filepath_or_buffer=csv_file_str,
-            df.columns= ["timestamp","stunden","minuten","gztmn","stau"]
+                             colspecs=[(7,19),(23,26),(26,28),(40,43),(43,48),(48,53),(53,58),(58,63),(63,68),(68,73)],
+                             header=None,skiprows=1,parse_dates=[0],date_format="%Y%m%d%H%M")
+            df.columns= ["timestamp","Vorhersagezeitpunkt","Differenzzeit","gztmn","stau","r1","r2","r3","r4","r5"]
            df["member"] = 0
-            df["pegel"] = df["gztmn"] + df["stau"]
+            stau_cols = ["stau","r1","r2","r3","r4","r5"]
-            df["forecast"]= df["timestamp"] + pd.to_timedelta(df["stunden"],unit="h") + pd.to_timedelta(df["minuten"],unit="m") + pd.to_timedelta(1,unit="h")
+            #Take the first valid fallback value or np.nan if all are invalid
+            df['pegel'] = df['gztmn'] + df[stau_cols].apply(lambda row: next((x for x in row if x >= -9900),np.nan), axis=1)
+            #for col in stau_cols:
+            #    df.loc[df[col] < -9900, col] = np.nan
+            #df['pegel'] = df['gztmn'] + df[stau_cols].apply(lambda row: next((x for x in row if pd.notna(x)), 0), axis=1)
+            df["forecast"]= df["timestamp"] + pd.to_timedelta(df["Vorhersagezeitpunkt"],unit="h") + pd.to_timedelta(df["Differenzzeit"],unit="m") + pd.to_timedelta(1,unit="h")
            df["timestamp"] = tsVorh #TODO @Ralf diese Zeile finde ich etwas fragwürdig. Ich vermute dass diese Zeit falsch und die IN der datei richtig ist.