import streamlit as st import pandas as pd import plotly.express as px import numpy as np import io # --- Konstanten --- TB2B = 1024**4 # TB in Bytes FILE_PATH = "/data/json_cache.json" PRICE_PER_TB = 8.1 # CHF / TB # --- Seitenkonfiguration --- st.set_page_config(layout="wide") # --- Daten laden --- @st.cache_data def load_data(): """ Lädt die Daten aus dem JSON-Cache. Falls die Datei nicht existiert, werden Dummy-Daten für Testzwecke erstellt. """ try: return pd.read_json(FILE_PATH) except Exception: return pd.DataFrame([ {"ownerGroup": "a-123", "department": "4000", "size": 500000, "packedSize": 400000}, {"ownerGroup": "p9999", "department": "6000", "size": 1200000, "packedSize": 1000000} ]) df = load_data() # --- Session State Initialisierung --- if "department" in df.columns: depts = sorted(df["department"].unique().tolist()) if 'gewaehltes_dept' not in st.session_state: st.session_state.gewaehltes_dept = depts[0] if depts else None # --- Navigation (Sidebar) --- st.sidebar.title("Navigation") st.sidebar.markdown("Wählen Sie eine Ansicht:") auswahl = st.sidebar.radio( label="Ansichten", options=[ "Übersicht & Metriken", "Bereichs-Analyse", "Nicht zuweisbare Daten", "Rohdaten" ], label_visibility="collapsed" ) # --- Hauptansicht --- if auswahl == "Übersicht & Metriken": st.title("Übersicht") total_vol = df["size"].sum() total_packed_vol = df["packedSize"].sum() col1, col2 = st.columns(2) col1.metric("Gesamtvolumen (TB)", f'{total_vol/TB2B:.2f}') col1.metric("Gesamtes packetiertes Volumen (TB)", f'{total_packed_vol/TB2B:.2f}') col1.metric("Anzahl Archivgruppen", len(df)) df_department_overview = df.groupby('department')['packedSize'].sum().to_frame() df_department_overview["Anteil [%]"] = ( 100 * df_department_overview["packedSize"] / total_packed_vol ) df_department_overview['Kosten [CHF]'] = ( (df_department_overview['packedSize'] / TB2B) * PRICE_PER_TB ) df_department_overview['packedSize'] /= TB2B df_department_overview.index.name = "Bereich" df_department_overview.rename( columns={"packedSize": "Totales packetiertes Volumen [TB]"}, inplace=True ) col2.table( df_department_overview.style.format({ "Totales packetiertes Volumen [TB]": "{:.4f}", "Anteil [%]": "{:.2f}", "Kosten [CHF]": "{:.2f}" }) ) # --- Export-Buttons --- export_col1, export_col2 = col2.columns(2) # CSV Export csv_data = df_department_overview.to_csv(index=True).encode('utf-8') export_col1.download_button( label="Export als CSV", data=csv_data, file_name="department_overview.csv", mime="text/csv", ) # Excel Export excel_buffer = io.BytesIO() with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer: df_department_overview.to_excel(writer, index=True, sheet_name='Department Overview') excel_data = excel_buffer.getvalue() export_col2.download_button( label="Export als XLSX", data=excel_data, file_name="department_overview.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ) if "department" in df: fig = px.pie( df, names="department", values="packedSize", title="Verteilung nach Bereichen" ) fig.update_layout(legend=dict(x=0.7, y=0.5)) st.plotly_chart(fig) elif auswahl == "Bereichs-Analyse": st.title("Bereichs-Analyse") if "department" in df and depts: try: default_index = depts.index(st.session_state.gewaehltes_dept) except ValueError: default_index = 0 neue_auswahl = st.selectbox( "Bereich auswählen:", depts, index=default_index ) st.session_state.gewaehltes_dept = neue_auswahl filtered_df = df[df["department"] == st.session_state.gewaehltes_dept].copy() if "copies" in filtered_df.columns: conditions = [ filtered_df['copies'].str.startswith('one', na=False), filtered_df['copies'].str.startswith('two', na=False) ] choices = [1, 2] filtered_df['copies'] = np.select( conditions, choices, default=filtered_df['copies'] ) if "packedSize" in filtered_df.columns: filtered_df = filtered_df.sort_values( by="packedSize", ascending=False ) rename_mapping = { "ownerGroup": "Gruppe", "copies": "Anzahl Kopien", "size": "unpacketierte Grösse", "packedSize": "packetierte Grösse", "beamline": "Beamline", "department": "Bereich" } filtered_df.rename(columns=rename_mapping, inplace=True) if "packetierte Grösse" in filtered_df.columns: filtered_df["Kosten [CHF]"] = \ (filtered_df["packetierte Grösse"] / TB2B) * PRICE_PER_TB st.metric( f"Anzahl Gruppen in Department {st.session_state.gewaehltes_dept}", len(filtered_df) ) st.dataframe(filtered_df, use_container_width=True, hide_index=True) elif auswahl == "Nicht zuweisbare Daten": st.title("Nicht zuweisbare Daten") no_department_df = df[ pd.to_numeric(df['department'], errors='coerce').isna() ].copy() no_department_vol = no_department_df['packedSize'].sum() st.write(f'Gesamtvolumen: {no_department_vol/TB2B:.2f} TB') st.write(f'Anzahl Gruppen: {len(no_department_df)}') no_department_df = no_department_df.sort_values( by="packedSize", ascending=False ) no_department_df['unpacketierte Grösse [GB]'] = \ no_department_df['size'] / (TB2B / 1024) no_department_df['packetierte Grösse [GB]'] = \ no_department_df['packedSize'] / (TB2B / 1024) no_department_df["Kosten [CHF]"] = \ (no_department_df["packedSize"] / TB2B) * PRICE_PER_TB no_department_df.drop( columns=['size', 'packedSize', 'beamline'], inplace=True ) if "copies" in no_department_df.columns: conditions = [ no_department_df['copies'].str.startswith('one', na=False), no_department_df['copies'].str.startswith('two', na=False) ] choices = [1, 2] no_department_df['copies'] = np.select( conditions, choices, default=no_department_df['copies'] ) rename_mapping = { "ownerGroup": "Gruppe", "copies": "Anzahl Kopien", "department": "Bereich" } no_department_df.rename(columns=rename_mapping, inplace=True) st.table(no_department_df.reset_index(drop=True)) elif auswahl == "Rohdaten": st.title("Rohdaten") st.write("Durchsuchen und filtern Sie die Gruppen. Grössenangaben in Bytes.") st.dataframe(df, use_container_width=True)