Initial version of data flagging app and some utility functions

2025-06-24 21:21:08 +02:00 · 2024-08-16 10:20:36 +02:00
parent 0c1c0d04da
commit a017f1fb12
2 changed files with 235 additions and 0 deletions
--- a/data_flagging_app.py
+++ b/data_flagging_app.py
@ -0,0 +1,175 @@
+import sys
+import os
+import h5py
+import numpy as np
+import pandas as pd
+# Set up project root directory
+root_dir = os.path.abspath(os.curdir)
+sys.path.append(root_dir)
+sys.path.append(os.path.join(root_dir,'dima'))
+
+import dima.src.hdf5_data_extraction as h5de
+import dima.src.metadata_review_lib as ma
+import dima.src.g5505_utils as utils
+import base64
+import dash
+import io
+
+import data_flagging_utils as data_flagging_utils
+
+#import dash_core_components as dcc
+#import dash_html_components as html
+#from dash.dependencies import Input, Output, State
+
+from dash import Dash, html, dcc, callback, Output, Input, State
+import plotly.graph_objs as go
+from plotly.subplots import make_subplots
+import pandas as pd
+import numpy as np
+
+import tempfile
+
+
+app = dash.Dash(__name__)
+app.layout = html.Div([
+    html.Div([
+            dcc.Upload(
+            id='upload-image',
+            children=html.Div([
+                'Drag and Drop or ',
+                html.A('Select Files')
+                    ]), style={"fontSize": "16px",
+                        'width': '100%',
+                        'height': '60px',
+                        'lineHeight': '60px',
+                        'borderWidth': '1px',
+                        'borderStyle': 'dashed',
+                        'borderRadius': '5px',
+                        'textAlign': 'center',
+                        'margin': '10px'
+                    },)                
+    ]),
+    html.Div([
+    dcc.Dropdown(
+        id='flag-options',
+        options=[{'label': data_flagging_utils.flags_dict[key]['flag_description'], 'value': key} for key in data_flagging_utils.flags_dict.keys()]#,
+        #value='000'
+    ),
+    html.Button('Flag Selected', id='flag-button')
+    ]),
+    html.Div([
+    dcc.Graph(id='timeseries-plot',
+            config={
+            'modeBarButtonsToAdd': ['select2d', 'lasso2d'],  # Add box select and lasso select tools
+            'displaylogo': False  # Optionally remove the Plotly logo from the mode bar
+            }),
+    
+    ]),
+    dcc.Store(id='memory-output'),#, data=fig.to_dict()),  # Store the initial figure
+    html.Div(id='textarea-example-output', style={'whiteSpace': 'pre-line'})
+    #html.Div(id='output-container')
+])
+
+@app.callback(Output('memory-output','data'),
+              Output('timeseries-plot', 'figure'),
+                 [Input('upload-image','filename')],
+                 [Input('upload-image','contents')]
+)
+def load_data(filename,contents):
+    data = {'data_loaded_flag':False}
+    if filename and contents and filename.endswith('.h5'):
+        try:
+            # Decode and read the uploaded file
+            content_type, content_string = contents.split(',')
+            decoded = base64.b64decode(content_string)
+            file_path = io.BytesIO(decoded)
+            
+            # Create a temporary file to save the content
+            #with tempfile.NamedTemporaryFile(delete=False, suffix='.h5') as temp_file:
+            #    temp_file.write(decoded)
+            #    temp_file_path = temp_file.name
+
+            fig = data_flagging_utils.create_loaded_file_figure(file_path)
+            
+            # Return file path and other relevant info
+            data['data_loaded_flag'] = True
+
+            fig.update_layout(dragmode='select',
+                  activeselection=dict(fillcolor='yellow'))
+            return data, fig
+        
+        except Exception as e:
+            print(f"Error processing file: {e}")
+            return data, dash.no_update
+        
+
+    return data, dash.no_update
+    
+
+@app.callback(
+    Output('timeseries-plot', 'figure',allow_duplicate=True),
+    Output('textarea-example-output','children'),
+    Input('timeseries-plot', 'relayoutData'),
+    State('timeseries-plot', 'figure'),
+    State('memory-output', 'data'),
+    prevent_initial_call=True
+)
+def update_graph(relayoutData, fig, data):
+
+    if data is None:
+        return dash.no_update, dash.no_update    
+        
+    if data.get('data_loaded_flag',False) is False:
+        return dash.no_update, dash.no_update 
+    # If fig is None or in its initial state, return no_update
+    #if fig is None or not fig['data']:  # Check if the figure is empty or in an initial state
+    #    return dash.no_update, dash.no_update
+
+    shapes = []
+    
+    # Handle case where relayoutData is provided
+    if relayoutData and 'xaxis.range[0]' in relayoutData:
+        start = relayoutData['xaxis.range[0]']
+        end = relayoutData['xaxis.range[1]']
+    else:
+        start = None  # Set to default or previously known values
+        end = None    # Set to default or previously known values
+
+    if start and end:
+        shapes.append({
+            'type': 'rect',
+            'xref': 'x',
+            'yref': 'paper',
+            'x0': start,
+            'y0': 0,
+            'x1': end,
+            'y1': 1,
+            'fillcolor': 'rgba(128, 0, 128, 0.3)',
+            'line': {'width': 0}
+        })
+        
+        # Update the figure with the new shape
+        fig['layout'].update(shapes=shapes)
+    
+    value = 'hola amigos'
+    #return fig, 'You have entered: \n{}'.format(value)
+    return dash.no_update, 'You have entered: \n{}'.format(value)
+
+
+#@app.callback(
+    #Output('output-container', 'children'),
+#    [Input('flag-button', 'n_clicks')],
+#    [State('flag-options', 'value'),
+#     State('timeseries-plot', 'relayoutData')]
+#)
+#def flag_data(n_clicks, flag, relayoutData):
+#    if n_clicks and relayoutData and 'xaxis.range[0]' in relayoutData:
+#        start = relayoutData['xaxis.range[0]']
+#        end = relayoutData['xaxis.range[1]']
+#        print(f'Flagged with: {flag} from {start} to {end}')
+#    elif n_clicks:
+#        print('Please select an interval to flag.')
+
+#if __name__ == '__main__':
+app.run_server(debug=True)
+
--- a/data_flagging_utils.py
+++ b/data_flagging_utils.py
@ -0,0 +1,60 @@
+
+import dima.src.hdf5_data_extraction as h5de
+from plotly.subplots import make_subplots
+import plotly.graph_objs as go
+
+
+flags_dict = {
+        "000" : {"flag_label": 'V', "flag_description": "Valid measurement"},
+        "100" : {"flag_label": 'V', "flag_description": "Checked by data originator. Valid measurement, overrides any invalid flags"},
+        "110" : {"flag_label": 'V', "flag_description": "Episode data checked and accepted by data originator. Valid measurement"},
+        "111" : {"flag_label": 'V', "flag_description": "Irregular data checked and accepted by data originator. Valid measurement"},
+        "456" : {"flag_label": 'I', "flag_description": "Invalidated by data originator"},
+        "460" : {"flag_label": 'I', "flag_description": "Contamination suspected"},
+        "559" : {"flag_label": 'V', "flag_description": "Unspecified contamination or local influence, but considered valid"},
+        "599" : {"flag_label": 'I', "flag_description": "Unspecified contamination or local influence"},
+        "652" : {"flag_label": 'V', "flag_description": "construction/activity nearby"},
+        "659" : {"flag_label": 'I', "flag_description": "Unspecified instrument/sampling anomaly"},
+        "660" : {"flag_label": 'V', "flag_description": "Unspecified instrument/sampling anomaly"},
+        "999" : {"flag_label": 'I', "flag_description": "Missing measurement, unspecified reason"}
+        }
+
+def create_loaded_file_figure(file_path):
+
+    DataOpsAPI = h5de.HDF5DataOpsManager(file_path)        
+
+    target_channels = DataOpsAPI.file_obj.attrs['target_channels']['names'][0].decode().split(',')
+    target_loc = DataOpsAPI.file_obj.attrs['target_channels']['location'][0].decode()
+    diagnostic_channels = DataOpsAPI.file_obj.attrs['diagnostic_channels']['names'][0].decode().split(',')
+    diagnostic_loc = DataOpsAPI.file_obj.attrs['diagnostic_channels']['location'][0].decode()
+
+    fig = make_subplots(rows=(len(target_channels+diagnostic_channels)-2), cols=1, shared_xaxes=True)
+
+    traces = []
+    trace_idx = 1
+    dataset = DataOpsAPI.file_obj[target_loc]
+    time_column = DataOpsAPI.reformat_datetime_column(target_loc,target_channels[0],'%d.%m.%Y %H:%M:%S.%f')
+    
+    
+    for i in range(1,len(target_channels)):
+        
+        fig.add_trace(go.Scatter(x = time_column,
+                                y = dataset[target_channels[i]][:],
+                                mode = 'lines',
+                                name = target_channels[i]), row=trace_idx, col=1)
+        trace_idx = trace_idx + 1
+        
+    dataset = DataOpsAPI.file_obj[diagnostic_loc]
+    time_column = DataOpsAPI.reformat_datetime_column(diagnostic_loc,diagnostic_channels[0],'%d.%m.%Y %H:%M:%S')
+    for i in range(1,len(diagnostic_channels)):
+        
+        fig.add_trace(go.Scatter(x = time_column,
+                                y = dataset[diagnostic_channels[i]][:],
+                                mode = 'lines',
+                                name = diagnostic_channels[i]), row=trace_idx, col=1)
+        fig.update_yaxes(row=trace_idx, col=1, type="log")
+        trace_idx = trace_idx + 1
+
+    DataOpsAPI.close_file()
+
+    return fig