Files
ArchiveCostWebapp/analytics/app.py
T

230 lines
7.1 KiB
Python

import streamlit as st
import pandas as pd
import plotly.express as px
import numpy as np
import io
import os
# --- Konstanten ---
TB2B = 1024**4 # TB in Bytes
FILE_PATH = os.getenv("JSON_CACHE_WITH_PATH", "/data/json_cache.json")
PRICE_PER_TB = 8.1 # CHF / TB
# --- Seitenkonfiguration ---
st.set_page_config(layout="wide")
# --- Daten laden ---
@st.cache_data
def load_data():
"""
Lädt die Daten aus dem JSON-Cache.
Falls die Datei nicht existiert, werden Dummy-Daten für Testzwecke erstellt.
"""
try:
return pd.read_json(FILE_PATH)
except Exception:
return pd.DataFrame([
{"ownerGroup": "a-123", "department": "4000", "size": 500000, "packedSize": 400000},
{"ownerGroup": "p9999", "department": "6000", "size": 1200000, "packedSize": 1000000}
])
df = load_data()
# --- Session State Initialisierung ---
if "department" in df.columns:
depts = sorted(df["department"].unique().tolist())
if 'gewaehltes_dept' not in st.session_state:
st.session_state.gewaehltes_dept = depts[0] if depts else None
# --- Navigation (Sidebar) ---
st.sidebar.title("Navigation")
st.sidebar.markdown("Wählen Sie eine Ansicht:")
auswahl = st.sidebar.radio(
label="Ansichten",
options=[
"Übersicht & Metriken",
"Bereichs-Analyse",
"Nicht zuweisbare Daten",
"Rohdaten"
],
label_visibility="collapsed"
)
# --- Hauptansicht ---
if auswahl == "Übersicht & Metriken":
st.title("Übersicht")
total_vol = df["size"].sum()
total_packed_vol = df["packedSize"].sum()
col1, col2 = st.columns(2)
col1.metric("Gesamtvolumen (TB)", f'{total_vol/TB2B:.2f}')
col1.metric("Gesamtes packetiertes Volumen (TB)", f'{total_packed_vol/TB2B:.2f}')
col1.metric("Anzahl Archivgruppen", len(df))
df_department_overview = df.groupby('department')['packedSize'].sum().to_frame()
df_department_overview["Anteil [%]"] = (
100 * df_department_overview["packedSize"] / total_packed_vol
)
df_department_overview['Kosten [CHF]'] = (
(df_department_overview['packedSize'] / TB2B) * PRICE_PER_TB
)
df_department_overview['packedSize'] /= TB2B
df_department_overview.index.name = "Bereich"
df_department_overview.rename(
columns={"packedSize": "Totales packetiertes Volumen [TB]"},
inplace=True
)
col2.table(
df_department_overview.style.format({
"Totales packetiertes Volumen [TB]": "{:.4f}",
"Anteil [%]": "{:.2f}",
"Kosten [CHF]": "{:.2f}"
})
)
# --- Export-Buttons ---
export_col1, export_col2 = col2.columns(2)
# CSV Export
csv_data = df_department_overview.to_csv(index=True).encode('utf-8')
export_col1.download_button(
label="Export als CSV",
data=csv_data,
file_name="department_overview.csv",
mime="text/csv",
)
# Excel Export
excel_buffer = io.BytesIO()
with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer:
df_department_overview.to_excel(writer, index=True, sheet_name='Department Overview')
excel_data = excel_buffer.getvalue()
export_col2.download_button(
label="Export als XLSX",
data=excel_data,
file_name="department_overview.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
)
if "department" in df:
fig = px.pie(
df,
names="department",
values="packedSize",
title="Verteilung nach Bereichen"
)
fig.update_layout(legend=dict(x=0.7, y=0.5))
st.plotly_chart(fig)
elif auswahl == "Bereichs-Analyse":
st.title("Bereichs-Analyse")
if "department" in df and depts:
try:
default_index = depts.index(st.session_state.gewaehltes_dept)
except ValueError:
default_index = 0
neue_auswahl = st.selectbox(
"Bereich auswählen:",
depts,
index=default_index
)
st.session_state.gewaehltes_dept = neue_auswahl
filtered_df = df[df["department"] == st.session_state.gewaehltes_dept].copy()
if "copies" in filtered_df.columns:
conditions = [
filtered_df['copies'].str.startswith('one', na=False),
filtered_df['copies'].str.startswith('two', na=False)
]
choices = [1, 2]
filtered_df['copies'] = np.select(
conditions, choices, default=filtered_df['copies']
)
if "packedSize" in filtered_df.columns:
filtered_df = filtered_df.sort_values(
by="packedSize", ascending=False
)
rename_mapping = {
"ownerGroup": "Gruppe",
"copies": "Anzahl Kopien",
"size": "unpacketierte Grösse",
"packedSize": "packetierte Grösse",
"beamline": "Beamline",
"department": "Bereich"
}
filtered_df.rename(columns=rename_mapping, inplace=True)
if "packetierte Grösse" in filtered_df.columns:
filtered_df["Kosten [CHF]"] = \
(filtered_df["packetierte Grösse"] / TB2B) * PRICE_PER_TB
st.metric(
f"Anzahl Gruppen in Department {st.session_state.gewaehltes_dept}",
len(filtered_df)
)
st.dataframe(filtered_df, use_container_width=True, hide_index=True)
elif auswahl == "Nicht zuweisbare Daten":
st.title("Nicht zuweisbare Daten")
no_department_df = df[
pd.to_numeric(df['department'], errors='coerce').isna()
].copy()
no_department_vol = no_department_df['packedSize'].sum()
st.write(f'Gesamtvolumen: {no_department_vol/TB2B:.2f} TB')
st.write(f'Anzahl Gruppen: {len(no_department_df)}')
no_department_df = no_department_df.sort_values(
by="packedSize", ascending=False
)
no_department_df['unpacketierte Grösse [GB]'] = \
no_department_df['size'] / (TB2B / 1024)
no_department_df['packetierte Grösse [GB]'] = \
no_department_df['packedSize'] / (TB2B / 1024)
no_department_df["Kosten [CHF]"] = \
(no_department_df["packedSize"] / TB2B) * PRICE_PER_TB
no_department_df.drop(
columns=['size', 'packedSize', 'beamline'], inplace=True
)
if "copies" in no_department_df.columns:
conditions = [
no_department_df['copies'].str.startswith('one', na=False),
no_department_df['copies'].str.startswith('two', na=False)
]
choices = [1, 2]
no_department_df['copies'] = np.select(
conditions, choices, default=no_department_df['copies']
)
rename_mapping = {
"ownerGroup": "Gruppe",
"copies": "Anzahl Kopien",
"department": "Bereich"
}
no_department_df.rename(columns=rename_mapping, inplace=True)
st.table(no_department_df.reset_index(drop=True))
elif auswahl == "Rohdaten":
st.title("Rohdaten")
st.write("Durchsuchen und filtern Sie die Gruppen. Grössenangaben in Bytes.")
st.dataframe(df, use_container_width=True)