Initial version of data flagging app and some utility functions

This commit is contained in:
2024-08-16 10:20:36 +02:00
parent 0c1c0d04da
commit a017f1fb12
2 changed files with 235 additions and 0 deletions

175
data_flagging_app.py Normal file
View File

@ -0,0 +1,175 @@
import sys
import os
import h5py
import numpy as np
import pandas as pd
# Set up project root directory
root_dir = os.path.abspath(os.curdir)
sys.path.append(root_dir)
sys.path.append(os.path.join(root_dir,'dima'))
import dima.src.hdf5_data_extraction as h5de
import dima.src.metadata_review_lib as ma
import dima.src.g5505_utils as utils
import base64
import dash
import io
import data_flagging_utils as data_flagging_utils
#import dash_core_components as dcc
#import dash_html_components as html
#from dash.dependencies import Input, Output, State
from dash import Dash, html, dcc, callback, Output, Input, State
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np
import tempfile
app = dash.Dash(__name__)
app.layout = html.Div([
html.Div([
dcc.Upload(
id='upload-image',
children=html.Div([
'Drag and Drop or ',
html.A('Select Files')
]), style={"fontSize": "16px",
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},)
]),
html.Div([
dcc.Dropdown(
id='flag-options',
options=[{'label': data_flagging_utils.flags_dict[key]['flag_description'], 'value': key} for key in data_flagging_utils.flags_dict.keys()]#,
#value='000'
),
html.Button('Flag Selected', id='flag-button')
]),
html.Div([
dcc.Graph(id='timeseries-plot',
config={
'modeBarButtonsToAdd': ['select2d', 'lasso2d'], # Add box select and lasso select tools
'displaylogo': False # Optionally remove the Plotly logo from the mode bar
}),
]),
dcc.Store(id='memory-output'),#, data=fig.to_dict()), # Store the initial figure
html.Div(id='textarea-example-output', style={'whiteSpace': 'pre-line'})
#html.Div(id='output-container')
])
@app.callback(Output('memory-output','data'),
Output('timeseries-plot', 'figure'),
[Input('upload-image','filename')],
[Input('upload-image','contents')]
)
def load_data(filename,contents):
data = {'data_loaded_flag':False}
if filename and contents and filename.endswith('.h5'):
try:
# Decode and read the uploaded file
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
file_path = io.BytesIO(decoded)
# Create a temporary file to save the content
#with tempfile.NamedTemporaryFile(delete=False, suffix='.h5') as temp_file:
# temp_file.write(decoded)
# temp_file_path = temp_file.name
fig = data_flagging_utils.create_loaded_file_figure(file_path)
# Return file path and other relevant info
data['data_loaded_flag'] = True
fig.update_layout(dragmode='select',
activeselection=dict(fillcolor='yellow'))
return data, fig
except Exception as e:
print(f"Error processing file: {e}")
return data, dash.no_update
return data, dash.no_update
@app.callback(
Output('timeseries-plot', 'figure',allow_duplicate=True),
Output('textarea-example-output','children'),
Input('timeseries-plot', 'relayoutData'),
State('timeseries-plot', 'figure'),
State('memory-output', 'data'),
prevent_initial_call=True
)
def update_graph(relayoutData, fig, data):
if data is None:
return dash.no_update, dash.no_update
if data.get('data_loaded_flag',False) is False:
return dash.no_update, dash.no_update
# If fig is None or in its initial state, return no_update
#if fig is None or not fig['data']: # Check if the figure is empty or in an initial state
# return dash.no_update, dash.no_update
shapes = []
# Handle case where relayoutData is provided
if relayoutData and 'xaxis.range[0]' in relayoutData:
start = relayoutData['xaxis.range[0]']
end = relayoutData['xaxis.range[1]']
else:
start = None # Set to default or previously known values
end = None # Set to default or previously known values
if start and end:
shapes.append({
'type': 'rect',
'xref': 'x',
'yref': 'paper',
'x0': start,
'y0': 0,
'x1': end,
'y1': 1,
'fillcolor': 'rgba(128, 0, 128, 0.3)',
'line': {'width': 0}
})
# Update the figure with the new shape
fig['layout'].update(shapes=shapes)
value = 'hola amigos'
#return fig, 'You have entered: \n{}'.format(value)
return dash.no_update, 'You have entered: \n{}'.format(value)
#@app.callback(
#Output('output-container', 'children'),
# [Input('flag-button', 'n_clicks')],
# [State('flag-options', 'value'),
# State('timeseries-plot', 'relayoutData')]
#)
#def flag_data(n_clicks, flag, relayoutData):
# if n_clicks and relayoutData and 'xaxis.range[0]' in relayoutData:
# start = relayoutData['xaxis.range[0]']
# end = relayoutData['xaxis.range[1]']
# print(f'Flagged with: {flag} from {start} to {end}')
# elif n_clicks:
# print('Please select an interval to flag.')
#if __name__ == '__main__':
app.run_server(debug=True)

60
data_flagging_utils.py Normal file
View File

@ -0,0 +1,60 @@
import dima.src.hdf5_data_extraction as h5de
from plotly.subplots import make_subplots
import plotly.graph_objs as go
flags_dict = {
"000" : {"flag_label": 'V', "flag_description": "Valid measurement"},
"100" : {"flag_label": 'V', "flag_description": "Checked by data originator. Valid measurement, overrides any invalid flags"},
"110" : {"flag_label": 'V', "flag_description": "Episode data checked and accepted by data originator. Valid measurement"},
"111" : {"flag_label": 'V', "flag_description": "Irregular data checked and accepted by data originator. Valid measurement"},
"456" : {"flag_label": 'I', "flag_description": "Invalidated by data originator"},
"460" : {"flag_label": 'I', "flag_description": "Contamination suspected"},
"559" : {"flag_label": 'V', "flag_description": "Unspecified contamination or local influence, but considered valid"},
"599" : {"flag_label": 'I', "flag_description": "Unspecified contamination or local influence"},
"652" : {"flag_label": 'V', "flag_description": "construction/activity nearby"},
"659" : {"flag_label": 'I', "flag_description": "Unspecified instrument/sampling anomaly"},
"660" : {"flag_label": 'V', "flag_description": "Unspecified instrument/sampling anomaly"},
"999" : {"flag_label": 'I', "flag_description": "Missing measurement, unspecified reason"}
}
def create_loaded_file_figure(file_path):
DataOpsAPI = h5de.HDF5DataOpsManager(file_path)
target_channels = DataOpsAPI.file_obj.attrs['target_channels']['names'][0].decode().split(',')
target_loc = DataOpsAPI.file_obj.attrs['target_channels']['location'][0].decode()
diagnostic_channels = DataOpsAPI.file_obj.attrs['diagnostic_channels']['names'][0].decode().split(',')
diagnostic_loc = DataOpsAPI.file_obj.attrs['diagnostic_channels']['location'][0].decode()
fig = make_subplots(rows=(len(target_channels+diagnostic_channels)-2), cols=1, shared_xaxes=True)
traces = []
trace_idx = 1
dataset = DataOpsAPI.file_obj[target_loc]
time_column = DataOpsAPI.reformat_datetime_column(target_loc,target_channels[0],'%d.%m.%Y %H:%M:%S.%f')
for i in range(1,len(target_channels)):
fig.add_trace(go.Scatter(x = time_column,
y = dataset[target_channels[i]][:],
mode = 'lines',
name = target_channels[i]), row=trace_idx, col=1)
trace_idx = trace_idx + 1
dataset = DataOpsAPI.file_obj[diagnostic_loc]
time_column = DataOpsAPI.reformat_datetime_column(diagnostic_loc,diagnostic_channels[0],'%d.%m.%Y %H:%M:%S')
for i in range(1,len(diagnostic_channels)):
fig.add_trace(go.Scatter(x = time_column,
y = dataset[diagnostic_channels[i]][:],
mode = 'lines',
name = diagnostic_channels[i]), row=trace_idx, col=1)
fig.update_yaxes(row=trace_idx, col=1, type="log")
trace_idx = trace_idx + 1
DataOpsAPI.close_file()
return fig