diff --git a/app/data_flagging_app.py b/app/data_flagging_app.py index 2f24f05..5e8d640 100644 --- a/app/data_flagging_app.py +++ b/app/data_flagging_app.py @@ -1,822 +1,822 @@ -import sys, os - -try: - thisFilePath = os.path.abspath(__file__) - print(thisFilePath) -except NameError: - print("[Notice] The __file__ attribute is unavailable in this environment (e.g., Jupyter or IDLE).") - print("When using a terminal, make sure the working directory is set to the script's location to prevent path issues (for the DIMA submodule)") - #print("Otherwise, path to submodule DIMA may not be resolved properly.") - thisFilePath = os.getcwd() # Use current directory or specify a default - -projectPath = os.path.normpath(os.path.join(thisFilePath, "..","..")) - -print(projectPath) - -if not projectPath in sys.path: - sys.path.insert(0,projectPath) - -#print(dimaPath) -import pandas as pd -import numpy as np - -import base64 -import dash -import io - -# Set up project root directory -#root_dir = os.path.abspath(os.curdir) -#sys.path.append(root_dir) -#sys.path.append(os.path.join(root_dir,'dima')) - - -import data_flagging_utils as data_flagging_utils - -from dash import Dash, html, dcc, callback, Output, Input, State, dash_table -import plotly.graph_objs as go -from plotly.subplots import make_subplots -import dash_bootstrap_components as dbc -import json - -import dima.src.hdf5_ops as hdf5_ops -#import dima.instruments.readers.filereader_registry as filereader_registry -#import instruments_.readers.flag_reader as flag_reader - -#filereader_registry.file_extensions.append('.json') -#filereader_registry.file_readers.update({'ACSM_TOFWARE_flags_json' : lambda x: flag_reader.read_jsonflag_as_dict(x)}) - -import threading -import webbrowser -from time import sleep - -EnableVisCheckbox = dbc.Col(dbc.Row([dbc.Col(dcc.Checklist( - id='enable-flag-checkbox', - options=[{'label': html.Span('Enable Flag Visualization', style={'font-size': 15, 'padding-left': 10}), 'value': True}], - value=[], - inline=True),width=6), - dbc.Col(dbc.Button("Load Flags", id='load-flags-button', color='primary'),width=4)], - justify="center", align="center"), - width=12) - -FlagVisTable = html.Div(dash_table.DataTable(data=[], - columns=[{"name": i, "id": i} for i in ['id','startdate','enddate','flag_description','parent_ch_pos','parent_channel']], - id='tbl', - style_header={'textAlign': 'center'}, - fixed_rows={'headers': True}, # Fixed table headers - style_table={'height': '1000px'}, # Make table scrollable - style_cell={'textAlign': 'left', 'padding': '10px'}, # Cell styling - ), - style={ - 'background-color': '#f0f0f0', # Background color for the table - #'height': '1000px', # Match the table's height - 'padding': '5px', # Optional padding around the table - 'border': '1px solid #ccc', # Optional border around the background - } ) - -ReviewOpsPannel = dbc.Col([ - # Row 1 - dbc.Row([html.H2("Flagging workflow pannel", style={'font-size': 20})]), - - - # Row 2 - dbc.Row([ - #dbc.Col(html.Div("Review Status"), width=6), - dcc.Checklist( - id='flag-review-status-checklist', - options=[ - {'label': [html.Span("Verify Flags", style={'font-size': 15, 'padding-left': 2})], 'value': 'will review'}, - {'label': [html.Span("Ready to Record Flags", style={'font-size': 15, 'padding-left': 2})], 'value': 'will transfer'}, - {'label': [html.Span("Finalize Flagging", style={'font-size': 15, 'padding-left': 2})], 'value': 'will apply'} - ], - value=[], - #inline=True, - style={ - "display": "flex", # Flexbox for left alignment - "flexDirection": "column", # Arrange the items vertically - "alignItems": "flex-start" # Align the items to the left - } - ), - ]), - - # Row 3 - dbc.Row([ - #dbc.Col(dbc.Button("Load Flags", id='button-1', color='primary'),width=4), - dbc.Col(dbc.Button("Delete Flag", id='delete-flag-button', color='primary'),width=4), - dbc.Col(dbc.Button("Record Flags", id='button-2', color='primary'),width=4), - dbc.Col(dbc.Button("Apply Flags", id='button-3', color='primary'),width=4)], - justify="center", align="center"), - - # Row 4 - #dbc.Row([ - # dbc.Col(html.Div("Apply Flags"), width=6), - # dbc.Col(dbc.Button("Button 2", id='button-2', color='secondary'), width=6), - #]), - ],width=12) - -# Initialize Dash app with Bootstrap theme -app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) - -#df = pd.DataFrame.empty() - -app.layout = dbc.Container([ - html.Div(children=[ - html.Div(children=[ - html.H1('QC/QA Data Flagging App'), - html.H6('All measurements are assumed valid unless checked otherwise.') - ] - )],style={'textAlign': 'center'}), - dbc.Row([ - - dbc.Col([ - dcc.Upload( - id='upload-image', - children=html.Div(['Drag and Drop or ',html.A('Select Files')]), - style={ - 'fontSize': "16px", - 'width': '100%', - 'height': '60px', - 'lineHeight': '60px', - 'borderWidth': '1px', - 'borderStyle': 'dashed', - 'borderRadius': '5px', - 'textAlign': 'center', - 'margin': '10px' - }), - dcc.Dropdown( - id='flag-options', - options= data_flagging_utils.filter_flags_by_label(data_flagging_utils.flags_dict,'I'), # displays only flags to invalidate - )], - width=12 - ), - #],justify="center", align="center"), - - #dbc.Row([ - dbc.Col([dbc.Button('Create Flag', id='flag-button', color="primary", className="mt-2")],width=2), - dbc.Col([dbc.Button('Reset Flag', id='reset-flag-button', color="secondary", className="mt-2")],width=2), - dbc.Col([dbc.Button('Commit Flag', id='commit-flag-button', color="secondary", className="mt-2")],width=2) - ], justify="center", align="center",style={'background-color': '#f8f9fa', 'padding': '20px', 'text-align': 'center'}), - - dbc.Row([ - html.H3("Instrument Dashboard"), - - # First Dropdown (Instrument Folders) - dcc.Dropdown( - id="instrument-dropdown", - options=[{"label": i, "value": i} for i in []], - placeholder="Select an Instrument Folder", - ), - - # Spinner wrapping the second and third dropdowns - dcc.Loading( - type="circle", # Spinner style - children=[ - # Second Dropdown (Files) - dcc.Dropdown( - id="file-dropdown", - placeholder="Select a File", - disabled=True # Initially disabled - ), - - # Third Dropdown (Sub-selection) - dcc.Dropdown( - id="sub-dropdown", - placeholder="Select Variables", - multi = True, - disabled=True - ) - ] - ) - ], - justify="center", align="center",style={'background-color': '#f8f9fa', 'padding': '20px', 'text-align': 'center'}), - - dbc.Row([ - dbc.Col([ - html.Div([ - html.Div(id='flag-mode-title', style={'whiteSpace': 'pre-line'}), - dcc.Loading( - type="circle", # Spinner style - children=[ - dcc.Graph(id='timeseries-plot', - style={'height': '1200px','width' : '100%'})]) - ], - style={'height': '1000px', 'overflowY': 'auto'}) - ], - width=8, - style={'background-color': '#e9ecef', 'padding': '20px', 'text-align': 'center','height': '1000px'}), - #dbc.Col([html.Div(id='flag-record', style={'whiteSpace': 'pre-line'})], width=4), #config={'modeBarButtons': True, - #'modeBarButtonsToAdd':['select2d','lasso2d'], - #'modeBarButtonsToRemove': ['zoom', 'pan']}),], width=12) - dbc.Col([ - html.Div([ - EnableVisCheckbox, - FlagVisTable, - ReviewOpsPannel, - ], - style={'height': '1000px','overflowY': 'auto'}), # Set a fixed height for the div - ], - - width=4, - style={'background-color': '#dee2e6', 'padding': '20px', 'text-align': 'center','height': '1000px'},) - - ],justify="center", align="center"), - - dbc.Row([ # row 3 - dbc.Col([ - dcc.Store(id='memory-output'), - html.Div(id='textarea-example-output', style={'whiteSpace': 'pre-line'}) - ], width=12) - ],justify="center", align="center"), -], -) - -#@app.callback() - -@app.callback( - Output('memory-output','data', allow_duplicate=True), - Output("instrument-dropdown", "options"), - Output("instrument-dropdown", "disabled"), - [Input('upload-image','filename'), - Input('upload-image','contents')], - prevent_initial_call=True -) -def load_data(filename, contents): - data = {'data_loaded_flag': False} - if filename and contents and filename.endswith('.h5'): - - try: - path_to_file = data_flagging_utils.save_file(filename,contents) - - DataOps = hdf5_ops.HDF5DataOpsManager(path_to_file) - DataOps.load_file_obj() - - #content_type, content_string = contents.split(',') - #decoded = base64.b64decode(content_string) - #file_path = io.BytesIO(decoded) - DataOps.extract_and_load_dataset_metadata() - df = DataOps.dataset_metadata_df.copy() - DataOps.unload_file_obj() - - # TODO: allow selection of instrument folder - - instrument_list = [{"label": instFolder, "value": instFolder} for instFolder in df['parent_instrument'].unique()] - - # Create list of file names in dict format for the first instFolder - instFolderName = df['parent_instrument'].unique()[0] - instFolderFileList = list(df.loc[df['parent_instrument']==instFolderName,'parent_file'].to_numpy()) - - #file_list = [{"label": fileName, "value": fileName} for fileName in child_files] - - #fig, channel_names = data_flagging_utils.create_loaded_file_figure(path_to_file, instfolder) - - data['data_loaded_flag'] = True - data['path_to_uploaded_file'] = path_to_file - data['dataset_metadata_table'] = {}# df.to_dict() - data[instFolderName] = instFolderFileList - - - data['instFolder'] = instFolderName - #data['channel_names'] = channel_names - - - - - - return data, instrument_list, False - - except Exception as e: - - DataOps.unload_file_obj() - print(f"Error processing file: {e}") - return data, [], False - - - return data, [], False - -@app.callback( - Output("file-dropdown", "options"), - Output("file-dropdown", "disabled"), - Input("instrument-dropdown", "value"), - State('memory-output','data'), - prevent_initial_call=True -) -def update_file_dropdown(instFolderName, data): - - - # Verify if dataset_metadata from uploaded HDF5 file was loaded correctly - if not all([instFolderName, data]): - return [], False - - if not 'dataset_metadata_table' in data.keys(): - return [], False - - - file_list = [] - # Get files in instFolder - instFolderFileList = data.get(instFolderName,[]) - - # Otherwise, if there is no precomputed file list associated with a instFolder, compute that from dataset_metadata - if instFolderFileList: - file_list = [{"label": fileName, "value": fileName} for fileName in instFolderFileList] - else: - path_to_file = data['path_to_uploaded_file'] - DataOps = hdf5_ops.HDF5DataOpsManager(path_to_file) - DataOps.load_file_obj() - - #content_type, content_string = contents.split(',') - #decoded = base64.b64decode(content_string) - #file_path = io.BytesIO(decoded) - DataOps.extract_and_load_dataset_metadata() - tmp = DataOps.dataset_metadata_df.copy() - DataOps.unload_file_obj() - - instFolderFileList = tmp.loc[tmp['parent_instrument']==instFolderName,'parent_file'].to_numpy() - file_list = [{"label": fileName, "value": fileName} for fileName in instFolderFileList] - - return file_list, False - -@app.callback( - Output("sub-dropdown", "options"), - Output("sub-dropdown", "disabled"), - Output("sub-dropdown", "value"), - Input("instrument-dropdown", "value"), - Input("file-dropdown", "value"), - State('memory-output','data'), - prevent_initial_call=True, -) -def update_variable_dropdown(instFolderName, fileName, data): - - - # Verify if dataset_metadata from uploaded HDF5 file was loaded correctly - #if not isinstance(data,dict): - # return [], False - - if not all([instFolderName, fileName, data]): - return [], False, [] - - - #file_list = [] - # Get files in instFolder - #instFolderFileList = data.get(instFolderName,[]) - - # Otherwise, if there is no precomputed file list associated with a instFolder, compute that from dataset_metadata - try: - path_to_file = data['path_to_uploaded_file'] - DataOps = hdf5_ops.HDF5DataOpsManager(path_to_file) - DataOps.load_file_obj() - - dataset_name = '/'.join([instFolderName,fileName,'data_table']) - # Get attributes for data table - datetime_var, datetime_var_format = DataOps.infer_datetime_variable(dataset_name) - metadata_dict = DataOps.get_metadata(dataset_name) - - #content_type, content_string = contents.split(',') - #decoded = base64.b64decode(content_string) - #file_path = io.BytesIO(decoded) - #DataOps.extract_and_load_dataset_metadata() - #tmp = DataOps.dataset_metadata_df.copy() - #DataOps.unload_file_obj() - - - - #instFolderFileList = tmp.loc[tmp['parent_instrument']==instFolderName,'parent_file'].to_numpy() - variableList = [] - for var_name in metadata_dict.keys(): - if var_name != datetime_var: - variableList.append(var_name) - - DataOps.unload_file_obj() - except Exception as e: - DataOps.unload_file_obj() - print(f"Error processing dataset_name: {e}") - return [], False, [] - - return [{"label": var_name, "value": var_name} for var_name in variableList] , False, variableList - -@app.callback( - Output('timeseries-plot', 'figure'), - Output('memory-output','data'), - Input('instrument-dropdown', 'value'), - Input('file-dropdown', 'value'), - Input('sub-dropdown', 'value'), - Input('memory-output', 'data'), - prevent_initial_call=True -) -def update_figure(instFolderName, fileName, variableList, data): - # Check if any input is None or empty - if not all([instFolderName, fileName, variableList, data]): - return go.Figure(), dash.no_update # Return an empty figure to prevent crashes - - path_to_file = data.get('path_to_uploaded_file') - if not path_to_file: - return go.Figure(), dash.no_update - - DataOps = hdf5_ops.HDF5DataOpsManager(path_to_file) - DataOps.load_file_obj() - dataset_name = '/'.join([instFolderName, fileName, 'data_table']) - - # Get attributes for data table - datetime_var, datetime_var_format = DataOps.infer_datetime_variable(dataset_name) - DataOps.unload_file_obj() - - fig, channel_names = data_flagging_utils.create_loaded_file_figure( - path_to_file, instFolderName, dataset_name, datetime_var, datetime_var_format, variableList - ) - data['channel_names'] = channel_names - return fig, data - - -"""@app.callback( - Output('memory-output','data'), - Output('timeseries-plot', 'figure'), - Output("instrument-dropdown", "options"), - Output("instrument-dropdown", "disabled"), - [Input('upload-image','filename')], - [Input('upload-image','contents')] -) -def load_data(filename, contents): - data = {'data_loaded_flag': False} - if filename and contents and filename.endswith('.h5'): - - try: - path_to_file = data_flagging_utils.save_file(filename,contents) - - DataOps = hdf5_ops.HDF5DataOpsManager(path_to_file) - DataOps.load_file_obj() - - #content_type, content_string = contents.split(',') - #decoded = base64.b64decode(content_string) - #file_path = io.BytesIO(decoded) - DataOps.extract_and_load_dataset_metadata() - df = DataOps.dataset_metadata_df.copy() - # TODO: allow selection of instrument folder - instfolder = df['parent_instrument'].unique()[0] - instrument_list = [{"label": instFolder, "value": instFolder} for instFolder in df['parent_instrument'].unique()] - - #fig, channel_names = data_flagging_utils.create_loaded_file_figure(path_to_file, instfolder) - - data['data_loaded_flag'] = True - data['path_to_uploaded_file'] = path_to_file - data['instfolder'] = instfolder - #data['channel_names'] = channel_names - - DataOps.unload_file_obj() - - - - return data, dash.no_update, instrument_list, False - - except Exception as e: - - DataOps.unload_file_obj() - print(f"Error processing file: {e}") - return data, dash.no_update, instrument_list, False - - - return data, dash.no_update, [], False""" - -@app.callback( - Output('timeseries-plot', 'figure', allow_duplicate=True), - Output('flag-mode-title','children'), - Input('flag-button', 'n_clicks'), - State('timeseries-plot', 'figure'), - State('memory-output', 'data'), - prevent_initial_call=True, -) -def create_flag(n_clicks, fig, data): - #if not data or not data.get('data_loaded_flag', False): - - if not all([n_clicks, fig, data]): - return dash.no_update, dash.no_update - - fig['layout'].update({'dragmode' : 'select', - 'activeselection' : dict(fillcolor='yellow'), - 'doubleClick' : 'reset' - }) - - #fig['layout'].update({'title':"Flagging Mode Enabled: Select ROI to Define Flagging Interval."}) - - #value = '{} amigos'.format(n_clicks) - title = "Flagging Mode Enabled: Select ROI to Define Flagging Interval." - return fig, title - #return fig - -#@app.callback( -# Output('timeseries-plot', 'figure', allow_duplicate=True), -# Output('timeseries-plot', 'selectedData', allow_duplicate=True), -# #Output('textarea-example-output','children'), -# Input('reset-flag-button', 'n_clicks'), -# State('timeseries-plot', 'figure'), -# #State('memory-output', 'data'), -# prevent_initial_call=True -#) -#def clear_flag(n_clicks, fig): - #if not data or not data.get('data_loaded_flag', False): - # return dash.no_update, dash.no_update - -# fig['layout'].update({'dragmode': 'zoom', 'activeselection': None}) - #fig.update_layout() - #update_layout(dragmode='select', activeselection=dict(fillcolor='yellow')) - - #shapes = [] - #if relayoutData and 'xaxis.range[0]' in relayoutData: - # start = relayoutData['xaxis.range[0]'] - # end = relayoutData['xaxis.range[1]'] - #else: - # start, end = None, None - - #if start and end: - # shapes.append({ - # 'type': 'rect', - # 'xref': 'x', - # 'yref': 'paper', - # 'x0': start, - # 'y0': 0, - # 'x1': end, - # 'y1': 1, - # 'fillcolor': 'rgba(128, 0, 128, 0.3)', - # 'line': {'width': 0} - # }) - # fig['layout'].update(shapes=shapes) - - #value = '{} amigos'.format(n_clicks) -# return fig, None #, f'You have entered: \n{value}' - -@app.callback( - [Output('timeseries-plot', 'selectedData'), - Output('timeseries-plot', 'figure', allow_duplicate=True), - Output('flag-mode-title', 'children',allow_duplicate=True)], - [Input('reset-flag-button', 'n_clicks'), - State('timeseries-plot', 'figure'), - State('memory-output', 'data')], - prevent_initial_call = True) -def clear_flag(n_clicks, fig, data): - - if n_clicks > 0 and data.get('data_loaded_flag', False): - # Clear selection - selected_data = None - fig['layout'].update({'dragmode': 'zoom', 'activeselection': None, - 'selections':{'line': None}}) - instFolder =data['instFolder'] - fig['layout'].update({'title': f'{instFolder}: Target and Diagnostic Channels'}) - flagging_mode_message = '' - return selected_data, fig, flagging_mode_message - else: - return dash.no_update, dash.no_update, dash.no_update - -@app.callback( - [Output('timeseries-plot', 'figure', allow_duplicate=True), - Output('timeseries-plot', 'selectedData',allow_duplicate=True), - Output('flag-mode-title', 'children',allow_duplicate=True)], - [Input('timeseries-plot', 'relayoutData'), - State('timeseries-plot', 'figure'), - State('memory-output', 'data')], - prevent_initial_call = True) -def clear_flag_mode_title(relayoutData, fig, data): - if not all([relayoutData, fig, data]): - return dash.no_update, dash.no_update, dash.no_update - - if data.get('data_loaded_flag', False) and not fig['layout'].get('dragmode',None) == 'select': - # Clear selection - selected_data = None - fig['layout'].update({'dragmode': 'zoom', 'activeselection': None, - 'selections':{'line': None}}) - #instFolder =data['instfolder'] - #fig['layout'].update({'title': f'{instFolder}: Target and Diagnostic Channels'}) - flagging_mode_message = '' - return fig, selected_data, flagging_mode_message - else: - return dash.no_update, dash.no_update, dash.no_update - -def extract_number(s): - return int(s[1:])-1 if s[1:].isdigit() else 0 - -@callback(Output('tbl', 'data'), - Input('commit-flag-button','n_clicks'), - State('flag-options','value'), - State('timeseries-plot','selectedData'), - State('memory-output', 'data'), - prevent_initial_call=True) -def commit_flag(n_clicks,flag_value,selected_Data, data): - - value = selected_Data - if (selected_Data is None) and (not isinstance(selected_Data,dict)): - return [] - elif not selected_Data.get('range',[]): # verify if there is a flag's time interval to commit - return [] - - # TODO: modify the name path/to/name to reflect the directory provenance - instFolder = data['instFolder'] - filePath = data['path_to_uploaded_file'] - - # Modified version (appending "_flags" to the first folder) - flagFolder = instFolder.split('/') - flagFolder[0] = f"{flagFolder[0]}_flags" # Modify first folder - flagFolder = '/'.join(flagFolder) - - flagfolderpath = os.path.join(os.path.splitext(data['path_to_uploaded_file'])[0], flagFolder) - flagfolderpath = os.path.normpath(flagfolderpath) - - #print("Without modification:", flagfolderpath_original) - print("With modification:", flagfolderpath) - - if not os.path.isdir(flagfolderpath): - os.makedirs(flagfolderpath) - - #dirlist = os.listdir(flagfolderpath) - # Get all files in the directory with their full paths - files = [os.path.join(flagfolderpath, f) for f in os.listdir(flagfolderpath)] - - # Sort files by creation time - dirlist_sorted_by_creation = sorted(files, key=os.path.getctime) - - #dirlist = dirlist.sort(key=lambda x: int(x.split('_')[1].split('.')[0])) - - display_flag_registry = True - if not display_flag_registry: - tableData = [] - else: - tableData = data_flagging_utils.load_flags(flagfolderpath) - - #tableData = [] - #for pathtofile in dirlist_sorted_by_creation: - # if '.json' in pathtofile: - # with open(pathtofile,'r') as f: - # tableData.append(json.load(f)) - - number_of_existing_flags = len(dirlist_sorted_by_creation) - flagid = number_of_existing_flags+1 - - - #if not os.path.exists(flag_filename): - # with open(flag_filename,'r') as open_flagsfile: - # json_flagsobject = json.load(open_flagsfile) - # data = [json_flagsobject[key] for key in json_flagsobject.keys()] - - - #return f'You have entered: \n{value}' - channel_names = data.get('channel_names', []) - for key, value in selected_Data['range'].items(): - if 'x' in key: - new_row = {'id':flagid,'startdate':value[0],'enddate':value[1],'flag_code': flag_value} - new_row.update(data_flagging_utils.flags_dict.get(flag_value,{})) - if channel_names: - channel_pos = extract_number(key) - parent_channel, parent_dataset = tuple(channel_names[channel_pos].split(',')) - new_row.update({'parent_ch_pos': str(channel_pos), 'parent_channel':parent_channel, 'parent_dataset': parent_dataset}) - - tableData.append(new_row) - #data = [{'startdate':value[0],'enddate':value[1],'value':90}] - - flag_filename = os.path.join(flagfolderpath,f'flag_{flagid}_{parent_channel}.json') - if not os.path.exists(flag_filename): - with open(flag_filename,'w') as flagsfile: - #json_flagsobject = json.dump({'row'+str(len(data)): new_row}, flagsfile) - json.dump(new_row, flagsfile) - #else: - # with open(flag_filename,'a') as flagsfile: - # json.dump(new_row, flagsfile) - #json.dump({'row'+str(len(data)): new_row}, flagsfile) - #data = [json_flagsobject[key] for key in json_flagsobject.keys()] - - return tableData - -#@callback(Output('memory-output','data',allow_duplicate=True), -# [Input('enable-flag-checkbox', 'value'), State('memory-output','data')], -# prevent_initial_call=True) - #[Input('tbl','active_cell'), Input('enable-flag-checkbox', 'value') State('timeseries-plot', 'figure'), State('tbl','data')],) -#def enable_flag_visualization(value, memory): -# if isinstance(memory,dict): -# memory.update({'vis_enabled' : value}) - -# return memory - -# return dash.no_update - -@callback(Output('timeseries-plot', 'figure',allow_duplicate=True), - [Input('enable-flag-checkbox', 'value'), State('timeseries-plot', 'figure')], - prevent_initial_call = True) -def clear_flags_from_figure(value, figure): - - vis_enabled = value[0] if value and isinstance(value, list) else False - - if not vis_enabled and figure: - shapes = figure.get('layout', {}).get('shapes', []) - - if shapes: # If there are shapes in the figure, clear them - new_figure = figure.copy() # Create a copy to avoid mutation - new_figure['layout']['shapes'] = [] - return new_figure - - return dash.no_update - - -@callback(Output('timeseries-plot', 'figure',allow_duplicate=True), - [Input('tbl','active_cell'), - State('enable-flag-checkbox', 'value'), State('timeseries-plot', 'figure'), State('tbl','data')], - prevent_initial_call = True) -def visualize_flag_on_figure(active_cell, value, figure, data): - - if value: - vis_enabled = value[0] - else: - vis_enabled = False - - - if active_cell and vis_enabled: - row = active_cell['row'] - startdate = data[row]['startdate'] - enddate = data[row]['enddate'] - parent_ch_pos = data[row].get('parent_ch_pos',None) - - if parent_ch_pos != None: - # Ensure that startdate and enddate are parsed correctly - #startdate = pd.to_datetime(startdate) - #enddate = pd.to_datetime(enddate) - - # Determine y-axis range directly from layout - yaxis_key = f"yaxis{int(parent_ch_pos) + 1}" if int(parent_ch_pos) > 0 else "yaxis" - xaxis_key = f"xaxis{int(parent_ch_pos) + 1}" if int(parent_ch_pos) > 0 else "xaxis" - #y_min = figure['layout'].get(yaxis_key, {}).get('range', [0, 1])[0] - #y_max = figure['layout'].get(yaxis_key, {}).get('range', [0, 1])[1] - - # Add a vertical region to the specified subplot - figure['layout']['shapes'] = figure['layout'].get('shapes', []) + [ - dict( - type="rect", - xref=xaxis_key.replace('axis', ''), - yref=yaxis_key.replace('axis', ''), - x0=startdate, - x1=enddate, - y0=figure['layout'][yaxis_key]['range'][0], - y1=figure['layout'][yaxis_key]['range'][1], - line=dict(color="rgba(50, 171, 96, 1)", width=2), - fillcolor="rgba(50, 171, 96, 0.3)", - ) - ] - return figure - - return dash.no_update - -@callback(Output('tbl', 'data',allow_duplicate=True), - [Input('load-flags-button','n_clicks'),State('enable-flag-checkbox', 'value'),State('memory-output', 'data')], - prevent_initial_call = True) -def visualize_flags_on_table(n_clicks,value,memoryData): - - - instFolder = memoryData.get('instfolder', '') - filePath = memoryData.get('path_to_uploaded_file', '') - - #flagfolderpath = os.path.join(os.path.splitext(memoryData['path_to_uploaded_file'])[0],f'{instfolder}_flags') - - if not filePath: - return dash.no_update - - - - - #flagfolderpath = os.path.join(os.path.splitext(memoryData['path_to_uploaded_file'])[0],f'{instfolder}_flags') - ## Return no table update if there is no flags folder - #if not os.path.exists(flagfolderpath): - # return dash.no_update - - #files = [os.path.join(flagfolderpath, f) for f in os.listdir(flagfolderpath)] - - vis_enabled = value[0] if value and isinstance(value, list) else False - - if n_clicks > 0 and vis_enabled: # and len(files) > 0: - - tableData = data_flagging_utils.load_flags(filePath, instFolder) - - if not tableData: - return dash.no_update - else: - return tableData - - # # Sort files by creation time - # dirlist_sorted_by_creation = sorted(files, key=os.path.getctime) - # tableData = [] - # for pathtofile in dirlist_sorted_by_creation: - # if '.json' in pathtofile: - # try: - # with open(pathtofile,'r') as f: - # tableData.append(json.load(f)) - # except (json.JSONDecodeError, FileNotFoundError) as e: - # print(e) - # continue # Skip invalid or missing files - - # return tableData - - return dash.no_update - - -def open_browser(): - """Wait for the server to start, then open the browser.""" - sleep(1) # Wait briefly to ensure the server is starting - webbrowser.open_new("http://127.0.0.1:8050/") - -if __name__ == '__main__': - # Start the browser-opening function in a separate thread - threading.Thread(target=open_browser).start() - - # Run the Dash app server +import sys, os + +try: + thisFilePath = os.path.abspath(__file__) + print(thisFilePath) +except NameError: + print("[Notice] The __file__ attribute is unavailable in this environment (e.g., Jupyter or IDLE).") + print("When using a terminal, make sure the working directory is set to the script's location to prevent path issues (for the DIMA submodule)") + #print("Otherwise, path to submodule DIMA may not be resolved properly.") + thisFilePath = os.getcwd() # Use current directory or specify a default + +projectPath = os.path.normpath(os.path.join(thisFilePath, "..","..")) + +print(projectPath) + +if not projectPath in sys.path: + sys.path.insert(0,projectPath) + +#print(dimaPath) +import pandas as pd +import numpy as np + +import base64 +import dash +import io + +# Set up project root directory +#root_dir = os.path.abspath(os.curdir) +#sys.path.append(root_dir) +#sys.path.append(os.path.join(root_dir,'dima')) + + +import data_flagging_utils as data_flagging_utils + +from dash import Dash, html, dcc, callback, Output, Input, State, dash_table +import plotly.graph_objs as go +from plotly.subplots import make_subplots +import dash_bootstrap_components as dbc +import json + +import dima.src.hdf5_ops as hdf5_ops +#import dima.instruments.readers.filereader_registry as filereader_registry +#import instruments_.readers.flag_reader as flag_reader + +#filereader_registry.file_extensions.append('.json') +#filereader_registry.file_readers.update({'ACSM_TOFWARE_flags_json' : lambda x: flag_reader.read_jsonflag_as_dict(x)}) + +import threading +import webbrowser +from time import sleep + +EnableVisCheckbox = dbc.Col(dbc.Row([dbc.Col(dcc.Checklist( + id='enable-flag-checkbox', + options=[{'label': html.Span('Enable Flag Visualization', style={'font-size': 15, 'padding-left': 10}), 'value': True}], + value=[], + inline=True),width=6), + dbc.Col(dbc.Button("Load Flags", id='load-flags-button', color='primary'),width=4)], + justify="center", align="center"), + width=12) + +FlagVisTable = html.Div(dash_table.DataTable(data=[], + columns=[{"name": i, "id": i} for i in ['id','startdate','enddate','flag_description','parent_ch_pos','parent_channel']], + id='tbl', + style_header={'textAlign': 'center'}, + fixed_rows={'headers': True}, # Fixed table headers + style_table={'height': '1000px'}, # Make table scrollable + style_cell={'textAlign': 'left', 'padding': '10px'}, # Cell styling + ), + style={ + 'background-color': '#f0f0f0', # Background color for the table + #'height': '1000px', # Match the table's height + 'padding': '5px', # Optional padding around the table + 'border': '1px solid #ccc', # Optional border around the background + } ) + +ReviewOpsPannel = dbc.Col([ + # Row 1 + dbc.Row([html.H2("Flagging workflow pannel", style={'font-size': 20})]), + + + # Row 2 + dbc.Row([ + #dbc.Col(html.Div("Review Status"), width=6), + dcc.Checklist( + id='flag-review-status-checklist', + options=[ + {'label': [html.Span("Verify Flags", style={'font-size': 15, 'padding-left': 2})], 'value': 'will review'}, + {'label': [html.Span("Ready to Record Flags", style={'font-size': 15, 'padding-left': 2})], 'value': 'will transfer'}, + {'label': [html.Span("Finalize Flagging", style={'font-size': 15, 'padding-left': 2})], 'value': 'will apply'} + ], + value=[], + #inline=True, + style={ + "display": "flex", # Flexbox for left alignment + "flexDirection": "column", # Arrange the items vertically + "alignItems": "flex-start" # Align the items to the left + } + ), + ]), + + # Row 3 + dbc.Row([ + #dbc.Col(dbc.Button("Load Flags", id='button-1', color='primary'),width=4), + dbc.Col(dbc.Button("Delete Flag", id='delete-flag-button', color='primary'),width=4), + dbc.Col(dbc.Button("Record Flags", id='button-2', color='primary'),width=4), + dbc.Col(dbc.Button("Apply Flags", id='button-3', color='primary'),width=4)], + justify="center", align="center"), + + # Row 4 + #dbc.Row([ + # dbc.Col(html.Div("Apply Flags"), width=6), + # dbc.Col(dbc.Button("Button 2", id='button-2', color='secondary'), width=6), + #]), + ],width=12) + +# Initialize Dash app with Bootstrap theme +app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) + +#df = pd.DataFrame.empty() + +app.layout = dbc.Container([ + html.Div(children=[ + html.Div(children=[ + html.H1('QC/QA Data Flagging App'), + html.H6('All measurements are assumed valid unless checked otherwise.') + ] + )],style={'textAlign': 'center'}), + dbc.Row([ + + dbc.Col([ + dcc.Upload( + id='upload-image', + children=html.Div(['Drag and Drop or ',html.A('Select Files')]), + style={ + 'fontSize': "16px", + 'width': '100%', + 'height': '60px', + 'lineHeight': '60px', + 'borderWidth': '1px', + 'borderStyle': 'dashed', + 'borderRadius': '5px', + 'textAlign': 'center', + 'margin': '10px' + }), + dcc.Dropdown( + id='flag-options', + options= data_flagging_utils.filter_flags_by_label(data_flagging_utils.flags_dict,'I'), # displays only flags to invalidate + )], + width=12 + ), + #],justify="center", align="center"), + + #dbc.Row([ + dbc.Col([dbc.Button('Create Flag', id='flag-button', color="primary", className="mt-2")],width=2), + dbc.Col([dbc.Button('Reset Flag', id='reset-flag-button', color="secondary", className="mt-2")],width=2), + dbc.Col([dbc.Button('Commit Flag', id='commit-flag-button', color="secondary", className="mt-2")],width=2) + ], justify="center", align="center",style={'background-color': '#f8f9fa', 'padding': '20px', 'text-align': 'center'}), + + dbc.Row([ + html.H3("Instrument Dashboard"), + + # First Dropdown (Instrument Folders) + dcc.Dropdown( + id="instrument-dropdown", + options=[{"label": i, "value": i} for i in []], + placeholder="Select an Instrument Folder", + ), + + # Spinner wrapping the second and third dropdowns + dcc.Loading( + type="circle", # Spinner style + children=[ + # Second Dropdown (Files) + dcc.Dropdown( + id="file-dropdown", + placeholder="Select a File", + disabled=True # Initially disabled + ), + + # Third Dropdown (Sub-selection) + dcc.Dropdown( + id="sub-dropdown", + placeholder="Select Variables", + multi = True, + disabled=True + ) + ] + ) + ], + justify="center", align="center",style={'background-color': '#f8f9fa', 'padding': '20px', 'text-align': 'center'}), + + dbc.Row([ + dbc.Col([ + html.Div([ + html.Div(id='flag-mode-title', style={'whiteSpace': 'pre-line'}), + dcc.Loading( + type="circle", # Spinner style + children=[ + dcc.Graph(id='timeseries-plot', + style={'height': '1200px','width' : '100%'})]) + ], + style={'height': '1000px', 'overflowY': 'auto'}) + ], + width=8, + style={'background-color': '#e9ecef', 'padding': '20px', 'text-align': 'center','height': '1000px'}), + #dbc.Col([html.Div(id='flag-record', style={'whiteSpace': 'pre-line'})], width=4), #config={'modeBarButtons': True, + #'modeBarButtonsToAdd':['select2d','lasso2d'], + #'modeBarButtonsToRemove': ['zoom', 'pan']}),], width=12) + dbc.Col([ + html.Div([ + EnableVisCheckbox, + FlagVisTable, + ReviewOpsPannel, + ], + style={'height': '1000px','overflowY': 'auto'}), # Set a fixed height for the div + ], + + width=4, + style={'background-color': '#dee2e6', 'padding': '20px', 'text-align': 'center','height': '1000px'},) + + ],justify="center", align="center"), + + dbc.Row([ # row 3 + dbc.Col([ + dcc.Store(id='memory-output'), + html.Div(id='textarea-example-output', style={'whiteSpace': 'pre-line'}) + ], width=12) + ],justify="center", align="center"), +], +) + +#@app.callback() + +@app.callback( + Output('memory-output','data', allow_duplicate=True), + Output("instrument-dropdown", "options"), + Output("instrument-dropdown", "disabled"), + [Input('upload-image','filename'), + Input('upload-image','contents')], + prevent_initial_call=True +) +def load_data(filename, contents): + data = {'data_loaded_flag': False} + if filename and contents and filename.endswith('.h5'): + + try: + path_to_file = data_flagging_utils.save_file(filename,contents) + + DataOps = hdf5_ops.HDF5DataOpsManager(path_to_file) + DataOps.load_file_obj() + + #content_type, content_string = contents.split(',') + #decoded = base64.b64decode(content_string) + #file_path = io.BytesIO(decoded) + DataOps.extract_and_load_dataset_metadata() + df = DataOps.dataset_metadata_df.copy() + DataOps.unload_file_obj() + + # TODO: allow selection of instrument folder + + instrument_list = [{"label": instFolder, "value": instFolder} for instFolder in df['parent_instrument'].unique()] + + # Create list of file names in dict format for the first instFolder + instFolderName = df['parent_instrument'].unique()[0] + instFolderFileList = list(df.loc[df['parent_instrument']==instFolderName,'parent_file'].to_numpy()) + + #file_list = [{"label": fileName, "value": fileName} for fileName in child_files] + + #fig, channel_names = data_flagging_utils.create_loaded_file_figure(path_to_file, instfolder) + + data['data_loaded_flag'] = True + data['path_to_uploaded_file'] = path_to_file + data['dataset_metadata_table'] = {}# df.to_dict() + data[instFolderName] = instFolderFileList + + + data['instFolder'] = instFolderName + #data['channel_names'] = channel_names + + + + + + return data, instrument_list, False + + except Exception as e: + + DataOps.unload_file_obj() + print(f"Error processing file: {e}") + return data, [], False + + + return data, [], False + +@app.callback( + Output("file-dropdown", "options"), + Output("file-dropdown", "disabled"), + Input("instrument-dropdown", "value"), + State('memory-output','data'), + prevent_initial_call=True +) +def update_file_dropdown(instFolderName, data): + + + # Verify if dataset_metadata from uploaded HDF5 file was loaded correctly + if not all([instFolderName, data]): + return [], False + + if not 'dataset_metadata_table' in data.keys(): + return [], False + + + file_list = [] + # Get files in instFolder + instFolderFileList = data.get(instFolderName,[]) + + # Otherwise, if there is no precomputed file list associated with a instFolder, compute that from dataset_metadata + if instFolderFileList: + file_list = [{"label": fileName, "value": fileName} for fileName in instFolderFileList] + else: + path_to_file = data['path_to_uploaded_file'] + DataOps = hdf5_ops.HDF5DataOpsManager(path_to_file) + DataOps.load_file_obj() + + #content_type, content_string = contents.split(',') + #decoded = base64.b64decode(content_string) + #file_path = io.BytesIO(decoded) + DataOps.extract_and_load_dataset_metadata() + tmp = DataOps.dataset_metadata_df.copy() + DataOps.unload_file_obj() + + instFolderFileList = tmp.loc[tmp['parent_instrument']==instFolderName,'parent_file'].to_numpy() + file_list = [{"label": fileName, "value": fileName} for fileName in instFolderFileList] + + return file_list, False + +@app.callback( + Output("sub-dropdown", "options"), + Output("sub-dropdown", "disabled"), + Output("sub-dropdown", "value"), + Input("instrument-dropdown", "value"), + Input("file-dropdown", "value"), + State('memory-output','data'), + prevent_initial_call=True, +) +def update_variable_dropdown(instFolderName, fileName, data): + + + # Verify if dataset_metadata from uploaded HDF5 file was loaded correctly + #if not isinstance(data,dict): + # return [], False + + if not all([instFolderName, fileName, data]): + return [], False, [] + + + #file_list = [] + # Get files in instFolder + #instFolderFileList = data.get(instFolderName,[]) + + # Otherwise, if there is no precomputed file list associated with a instFolder, compute that from dataset_metadata + try: + path_to_file = data['path_to_uploaded_file'] + DataOps = hdf5_ops.HDF5DataOpsManager(path_to_file) + DataOps.load_file_obj() + + dataset_name = '/'.join([instFolderName,fileName,'data_table']) + # Get attributes for data table + datetime_var, datetime_var_format = DataOps.infer_datetime_variable(dataset_name) + metadata_dict = DataOps.get_metadata(dataset_name) + + #content_type, content_string = contents.split(',') + #decoded = base64.b64decode(content_string) + #file_path = io.BytesIO(decoded) + #DataOps.extract_and_load_dataset_metadata() + #tmp = DataOps.dataset_metadata_df.copy() + #DataOps.unload_file_obj() + + + + #instFolderFileList = tmp.loc[tmp['parent_instrument']==instFolderName,'parent_file'].to_numpy() + variableList = [] + for var_name in metadata_dict.keys(): + if var_name != datetime_var: + variableList.append(var_name) + + DataOps.unload_file_obj() + except Exception as e: + DataOps.unload_file_obj() + print(f"Error processing dataset_name: {e}") + return [], False, [] + + return [{"label": var_name, "value": var_name} for var_name in variableList] , False, variableList + +@app.callback( + Output('timeseries-plot', 'figure'), + Output('memory-output','data'), + Input('instrument-dropdown', 'value'), + Input('file-dropdown', 'value'), + Input('sub-dropdown', 'value'), + Input('memory-output', 'data'), + prevent_initial_call=True +) +def update_figure(instFolderName, fileName, variableList, data): + # Check if any input is None or empty + if not all([instFolderName, fileName, variableList, data]): + return go.Figure(), dash.no_update # Return an empty figure to prevent crashes + + path_to_file = data.get('path_to_uploaded_file') + if not path_to_file: + return go.Figure(), dash.no_update + + DataOps = hdf5_ops.HDF5DataOpsManager(path_to_file) + DataOps.load_file_obj() + dataset_name = '/'.join([instFolderName, fileName, 'data_table']) + + # Get attributes for data table + datetime_var, datetime_var_format = DataOps.infer_datetime_variable(dataset_name) + DataOps.unload_file_obj() + + fig, channel_names = data_flagging_utils.create_loaded_file_figure( + path_to_file, instFolderName, dataset_name, datetime_var, datetime_var_format, variableList + ) + data['channel_names'] = channel_names + return fig, data + + +"""@app.callback( + Output('memory-output','data'), + Output('timeseries-plot', 'figure'), + Output("instrument-dropdown", "options"), + Output("instrument-dropdown", "disabled"), + [Input('upload-image','filename')], + [Input('upload-image','contents')] +) +def load_data(filename, contents): + data = {'data_loaded_flag': False} + if filename and contents and filename.endswith('.h5'): + + try: + path_to_file = data_flagging_utils.save_file(filename,contents) + + DataOps = hdf5_ops.HDF5DataOpsManager(path_to_file) + DataOps.load_file_obj() + + #content_type, content_string = contents.split(',') + #decoded = base64.b64decode(content_string) + #file_path = io.BytesIO(decoded) + DataOps.extract_and_load_dataset_metadata() + df = DataOps.dataset_metadata_df.copy() + # TODO: allow selection of instrument folder + instfolder = df['parent_instrument'].unique()[0] + instrument_list = [{"label": instFolder, "value": instFolder} for instFolder in df['parent_instrument'].unique()] + + #fig, channel_names = data_flagging_utils.create_loaded_file_figure(path_to_file, instfolder) + + data['data_loaded_flag'] = True + data['path_to_uploaded_file'] = path_to_file + data['instfolder'] = instfolder + #data['channel_names'] = channel_names + + DataOps.unload_file_obj() + + + + return data, dash.no_update, instrument_list, False + + except Exception as e: + + DataOps.unload_file_obj() + print(f"Error processing file: {e}") + return data, dash.no_update, instrument_list, False + + + return data, dash.no_update, [], False""" + +@app.callback( + Output('timeseries-plot', 'figure', allow_duplicate=True), + Output('flag-mode-title','children'), + Input('flag-button', 'n_clicks'), + State('timeseries-plot', 'figure'), + State('memory-output', 'data'), + prevent_initial_call=True, +) +def create_flag(n_clicks, fig, data): + #if not data or not data.get('data_loaded_flag', False): + + if not all([n_clicks, fig, data]): + return dash.no_update, dash.no_update + + fig['layout'].update({'dragmode' : 'select', + 'activeselection' : dict(fillcolor='yellow'), + 'doubleClick' : 'reset' + }) + + #fig['layout'].update({'title':"Flagging Mode Enabled: Select ROI to Define Flagging Interval."}) + + #value = '{} amigos'.format(n_clicks) + title = "Flagging Mode Enabled: Select ROI to Define Flagging Interval." + return fig, title + #return fig + +#@app.callback( +# Output('timeseries-plot', 'figure', allow_duplicate=True), +# Output('timeseries-plot', 'selectedData', allow_duplicate=True), +# #Output('textarea-example-output','children'), +# Input('reset-flag-button', 'n_clicks'), +# State('timeseries-plot', 'figure'), +# #State('memory-output', 'data'), +# prevent_initial_call=True +#) +#def clear_flag(n_clicks, fig): + #if not data or not data.get('data_loaded_flag', False): + # return dash.no_update, dash.no_update + +# fig['layout'].update({'dragmode': 'zoom', 'activeselection': None}) + #fig.update_layout() + #update_layout(dragmode='select', activeselection=dict(fillcolor='yellow')) + + #shapes = [] + #if relayoutData and 'xaxis.range[0]' in relayoutData: + # start = relayoutData['xaxis.range[0]'] + # end = relayoutData['xaxis.range[1]'] + #else: + # start, end = None, None + + #if start and end: + # shapes.append({ + # 'type': 'rect', + # 'xref': 'x', + # 'yref': 'paper', + # 'x0': start, + # 'y0': 0, + # 'x1': end, + # 'y1': 1, + # 'fillcolor': 'rgba(128, 0, 128, 0.3)', + # 'line': {'width': 0} + # }) + # fig['layout'].update(shapes=shapes) + + #value = '{} amigos'.format(n_clicks) +# return fig, None #, f'You have entered: \n{value}' + +@app.callback( + [Output('timeseries-plot', 'selectedData'), + Output('timeseries-plot', 'figure', allow_duplicate=True), + Output('flag-mode-title', 'children',allow_duplicate=True)], + [Input('reset-flag-button', 'n_clicks'), + State('timeseries-plot', 'figure'), + State('memory-output', 'data')], + prevent_initial_call = True) +def clear_flag(n_clicks, fig, data): + + if n_clicks > 0 and data.get('data_loaded_flag', False): + # Clear selection + selected_data = None + fig['layout'].update({'dragmode': 'zoom', 'activeselection': None, + 'selections':{'line': None}}) + instFolder =data['instFolder'] + fig['layout'].update({'title': f'{instFolder}: Target and Diagnostic Channels'}) + flagging_mode_message = '' + return selected_data, fig, flagging_mode_message + else: + return dash.no_update, dash.no_update, dash.no_update + +@app.callback( + [Output('timeseries-plot', 'figure', allow_duplicate=True), + Output('timeseries-plot', 'selectedData',allow_duplicate=True), + Output('flag-mode-title', 'children',allow_duplicate=True)], + [Input('timeseries-plot', 'relayoutData'), + State('timeseries-plot', 'figure'), + State('memory-output', 'data')], + prevent_initial_call = True) +def clear_flag_mode_title(relayoutData, fig, data): + if not all([relayoutData, fig, data]): + return dash.no_update, dash.no_update, dash.no_update + + if data.get('data_loaded_flag', False) and not fig['layout'].get('dragmode',None) == 'select': + # Clear selection + selected_data = None + fig['layout'].update({'dragmode': 'zoom', 'activeselection': None, + 'selections':{'line': None}}) + #instFolder =data['instfolder'] + #fig['layout'].update({'title': f'{instFolder}: Target and Diagnostic Channels'}) + flagging_mode_message = '' + return fig, selected_data, flagging_mode_message + else: + return dash.no_update, dash.no_update, dash.no_update + +def extract_number(s): + return int(s[1:]) if s[1:].isdigit() else 0 + +@callback(Output('tbl', 'data'), + Input('commit-flag-button','n_clicks'), + State('flag-options','value'), + State('timeseries-plot','selectedData'), + State('memory-output', 'data'), + prevent_initial_call=True) +def commit_flag(n_clicks,flag_value,selected_Data, data): + + value = selected_Data + if (selected_Data is None) and (not isinstance(selected_Data,dict)): + return [] + elif not selected_Data.get('range',[]): # verify if there is a flag's time interval to commit + return [] + + # TODO: modify the name path/to/name to reflect the directory provenance + instFolder = data['instFolder'] + filePath = data['path_to_uploaded_file'] + + # Modified version (appending "_flags" to the first folder) + flagFolder = instFolder.split('/') + flagFolder[0] = f"{flagFolder[0]}_flags" # Modify first folder + flagFolder = '/'.join(flagFolder) + + flagfolderpath = os.path.join(os.path.splitext(data['path_to_uploaded_file'])[0], flagFolder) + flagfolderpath = os.path.normpath(flagfolderpath) + + #print("Without modification:", flagfolderpath_original) + print("With modification:", flagfolderpath) + + if not os.path.isdir(flagfolderpath): + os.makedirs(flagfolderpath) + + #dirlist = os.listdir(flagfolderpath) + # Get all files in the directory with their full paths + files = [os.path.join(flagfolderpath, f) for f in os.listdir(flagfolderpath)] + + # Sort files by creation time + dirlist_sorted_by_creation = sorted(files, key=os.path.getctime) + + #dirlist = dirlist.sort(key=lambda x: int(x.split('_')[1].split('.')[0])) + + display_flag_registry = True + if not display_flag_registry: + tableData = [] + else: + tableData = data_flagging_utils.load_flags(flagfolderpath) + + #tableData = [] + #for pathtofile in dirlist_sorted_by_creation: + # if '.json' in pathtofile: + # with open(pathtofile,'r') as f: + # tableData.append(json.load(f)) + + number_of_existing_flags = len(dirlist_sorted_by_creation) + flagid = number_of_existing_flags+1 + + + #if not os.path.exists(flag_filename): + # with open(flag_filename,'r') as open_flagsfile: + # json_flagsobject = json.load(open_flagsfile) + # data = [json_flagsobject[key] for key in json_flagsobject.keys()] + + + #return f'You have entered: \n{value}' + channel_names = data.get('channel_names', []) + for key, value in selected_Data['range'].items(): + if 'x' in key: + new_row = {'id':flagid,'startdate':value[0],'enddate':value[1],'flag_code': flag_value} + new_row.update(data_flagging_utils.flags_dict.get(flag_value,{})) + if channel_names: + channel_pos = extract_number(key) + parent_channel, parent_dataset = tuple(channel_names[channel_pos].split(',')) + new_row.update({'parent_ch_pos': str(channel_pos), 'parent_channel':parent_channel, 'parent_dataset': parent_dataset}) + + tableData.append(new_row) + #data = [{'startdate':value[0],'enddate':value[1],'value':90}] + + flag_filename = os.path.join(flagfolderpath,f'flag_{flagid}_{parent_channel}.json') + if not os.path.exists(flag_filename): + with open(flag_filename,'w') as flagsfile: + #json_flagsobject = json.dump({'row'+str(len(data)): new_row}, flagsfile) + json.dump(new_row, flagsfile) + #else: + # with open(flag_filename,'a') as flagsfile: + # json.dump(new_row, flagsfile) + #json.dump({'row'+str(len(data)): new_row}, flagsfile) + #data = [json_flagsobject[key] for key in json_flagsobject.keys()] + + return tableData + +#@callback(Output('memory-output','data',allow_duplicate=True), +# [Input('enable-flag-checkbox', 'value'), State('memory-output','data')], +# prevent_initial_call=True) + #[Input('tbl','active_cell'), Input('enable-flag-checkbox', 'value') State('timeseries-plot', 'figure'), State('tbl','data')],) +#def enable_flag_visualization(value, memory): +# if isinstance(memory,dict): +# memory.update({'vis_enabled' : value}) + +# return memory + +# return dash.no_update + +@callback(Output('timeseries-plot', 'figure',allow_duplicate=True), + [Input('enable-flag-checkbox', 'value'), State('timeseries-plot', 'figure')], + prevent_initial_call = True) +def clear_flags_from_figure(value, figure): + + vis_enabled = value[0] if value and isinstance(value, list) else False + + if not vis_enabled and figure: + shapes = figure.get('layout', {}).get('shapes', []) + + if shapes: # If there are shapes in the figure, clear them + new_figure = figure.copy() # Create a copy to avoid mutation + new_figure['layout']['shapes'] = [] + return new_figure + + return dash.no_update + + +@callback(Output('timeseries-plot', 'figure',allow_duplicate=True), + [Input('tbl','active_cell'), + State('enable-flag-checkbox', 'value'), State('timeseries-plot', 'figure'), State('tbl','data')], + prevent_initial_call = True) +def visualize_flag_on_figure(active_cell, value, figure, data): + + if value: + vis_enabled = value[0] + else: + vis_enabled = False + + + if active_cell and vis_enabled: + row = active_cell['row'] + startdate = data[row]['startdate'] + enddate = data[row]['enddate'] + parent_ch_pos = data[row].get('parent_ch_pos',None) + + if parent_ch_pos != None: + # Ensure that startdate and enddate are parsed correctly + #startdate = pd.to_datetime(startdate) + #enddate = pd.to_datetime(enddate) + + # Determine y-axis range directly from layout + yaxis_key = f"yaxis{int(parent_ch_pos) + 1}" if int(parent_ch_pos) > 0 else "yaxis" + xaxis_key = f"xaxis{int(parent_ch_pos) + 1}" if int(parent_ch_pos) > 0 else "xaxis" + #y_min = figure['layout'].get(yaxis_key, {}).get('range', [0, 1])[0] + #y_max = figure['layout'].get(yaxis_key, {}).get('range', [0, 1])[1] + + # Add a vertical region to the specified subplot + figure['layout']['shapes'] = figure['layout'].get('shapes', []) + [ + dict( + type="rect", + xref=xaxis_key.replace('axis', ''), + yref=yaxis_key.replace('axis', ''), + x0=startdate, + x1=enddate, + y0=figure['layout'][yaxis_key]['range'][0], + y1=figure['layout'][yaxis_key]['range'][1], + line=dict(color="rgba(50, 171, 96, 1)", width=2), + fillcolor="rgba(50, 171, 96, 0.3)", + ) + ] + return figure + + return dash.no_update + +@callback(Output('tbl', 'data',allow_duplicate=True), + [Input('load-flags-button','n_clicks'),State('enable-flag-checkbox', 'value'),State('memory-output', 'data')], + prevent_initial_call = True) +def visualize_flags_on_table(n_clicks,value,memoryData): + + + instFolder = memoryData.get('instfolder', '') + filePath = memoryData.get('path_to_uploaded_file', '') + + #flagfolderpath = os.path.join(os.path.splitext(memoryData['path_to_uploaded_file'])[0],f'{instfolder}_flags') + + if not filePath: + return dash.no_update + + + + + #flagfolderpath = os.path.join(os.path.splitext(memoryData['path_to_uploaded_file'])[0],f'{instfolder}_flags') + ## Return no table update if there is no flags folder + #if not os.path.exists(flagfolderpath): + # return dash.no_update + + #files = [os.path.join(flagfolderpath, f) for f in os.listdir(flagfolderpath)] + + vis_enabled = value[0] if value and isinstance(value, list) else False + + if n_clicks > 0 and vis_enabled: # and len(files) > 0: + + tableData = data_flagging_utils.load_flags(filePath, instFolder) + + if not tableData: + return dash.no_update + else: + return tableData + + # # Sort files by creation time + # dirlist_sorted_by_creation = sorted(files, key=os.path.getctime) + # tableData = [] + # for pathtofile in dirlist_sorted_by_creation: + # if '.json' in pathtofile: + # try: + # with open(pathtofile,'r') as f: + # tableData.append(json.load(f)) + # except (json.JSONDecodeError, FileNotFoundError) as e: + # print(e) + # continue # Skip invalid or missing files + + # return tableData + + return dash.no_update + + +def open_browser(): + """Wait for the server to start, then open the browser.""" + sleep(1) # Wait briefly to ensure the server is starting + webbrowser.open_new("http://127.0.0.1:8050/") + +if __name__ == '__main__': + # Start the browser-opening function in a separate thread + threading.Thread(target=open_browser).start() + + # Run the Dash app server app.run_server(debug=True, use_reloader=False) \ No newline at end of file diff --git a/app/data_flagging_utils.py b/app/data_flagging_utils.py index 53d780b..eb96253 100644 --- a/app/data_flagging_utils.py +++ b/app/data_flagging_utils.py @@ -1,301 +1,301 @@ - -import dima.src.hdf5_ops as h5de -from plotly.subplots import make_subplots -import plotly.graph_objs as go -import base64 -import os - -import numpy as np -import pandas as pd -import dima.utils.g5505_utils as utils - -UPLOAD_DIRECTORY = 'data_products/' - -flags_dict = { - "000" : {"flag_label": 'V', "flag_description": "Valid measurement"}, - "100" : {"flag_label": 'V', "flag_description": "Checked by data originator. Valid measurement, overrides any invalid flags"}, - "110" : {"flag_label": 'V', "flag_description": "Episode data checked and accepted by data originator. Valid measurement"}, - "111" : {"flag_label": 'V', "flag_description": "Irregular data checked and accepted by data originator. Valid measurement"}, - "456" : {"flag_label": 'I', "flag_description": "Invalidated by data originator"}, - "460" : {"flag_label": 'I', "flag_description": "Contamination suspected"}, - "559" : {"flag_label": 'V', "flag_description": "Unspecified contamination or local influence, but considered valid"}, - "599" : {"flag_label": 'I', "flag_description": "Unspecified contamination or local influence"}, - "652" : {"flag_label": 'V', "flag_description": "construction/activity nearby"}, - "659" : {"flag_label": 'I', "flag_description": "Unspecified instrument/sampling anomaly"}, - "660" : {"flag_label": 'V', "flag_description": "Unspecified instrument/sampling anomaly"}, - "999" : {"flag_label": 'I', "flag_description": "Missing measurement, unspecified reason"} - } - -def save_file(name, content): - # Decode the content and save the file - content_type, content_string = content.split(',') - decoded = base64.b64decode(content_string) - file_path = os.path.join(UPLOAD_DIRECTORY, name) - if not os.path.exists(file_path): - with open(file_path, "wb") as f: - f.write(decoded) - print(f"File saved successfully at {file_path}") - return file_path - else: - print(f'File already exists at {file_path}.\nTo maintain the integrity of the existing file, it will not be overwritten.') - return file_path - -def filter_flags_by_label(flags_dict, label): - """ - Filters the flags dictionary by the specified label. - - Parameters: - ----------- - flags_dict (dict): The dictionary containing flags. - label (str): The label to filter by ('I' or 'V'). - - Returns: - -------- - list: A list of dictionaries with 'label' and 'value' for the specified label. - """ - return [{'label': value['flag_description'], 'value': code} - for code, value in flags_dict.items() if value['flag_label'] == label] - - -def create_loaded_file_figure(file_path, instFolder, dataset_name, datetime_var, datetime_var_format, variables): - - DataOpsAPI = h5de.HDF5DataOpsManager(file_path) - - if not DataOpsAPI.file_obj: - DataOpsAPI.load_file_obj() - - #target_channels = DataOpsAPI.file_obj[instfolder].attrs['target_channels']['names'][0].decode().split(',') - #target_loc = DataOpsAPI.file_obj[instfolder].attrs['target_channels']['location'][0].decode() - #diagnostic_channels = DataOpsAPI.file_obj[instfolder].attrs['diagnostic_channels']['names'][0].decode().split(',') - #diagnostic_loc = DataOpsAPI.file_obj[instfolder].attrs['diagnostic_channels']['location'][0].decode() - - #fig = make_subplots(rows=(len(target_channels+diagnostic_channels)-2), cols=1, shared_xaxes=True, - # row_heights = [1 for i in range(len(target_channels+diagnostic_channels)-2)]) - fig = make_subplots(rows=(len(variables)), cols=1, - row_heights = [1 for i in range(len(variables))]) - traces = [] - trace_idx = 1 - dataset = DataOpsAPI.file_obj[dataset_name] - time_column = DataOpsAPI.reformat_datetime_column(dataset_name, - datetime_var, - datetime_var_format) - - #time_column = dataset[datetime_var][:] - for i in range(1,len(variables)): - - fig.add_trace(go.Scatter(x = time_column, - y = dataset[variables[i]][:], - mode = 'lines', - name = variables[i]), row=trace_idx, col=1) - fig.update_yaxes(title_text= variables[i], row=trace_idx, col=1) - trace_idx = trace_idx + 1 - - #dataset = DataOpsAPI.file_obj[diagnostic_loc] - #time_column = DataOpsAPI.reformat_datetime_column(diagnostic_loc,diagnostic_channels[0],'%d.%m.%Y %H:%M:%S') - #for i in range(1,len(diagnostic_channels)): - - # fig.add_trace(go.Scatter(x = time_column, - # y = dataset[diagnostic_channels[i]][:], - # mode = 'lines', - # name = diagnostic_channels[i]), row=trace_idx, col=1) - # fig.update_yaxes(title_text= diagnostic_channels[i], row=trace_idx, col=1, type="log") - # trace_idx = trace_idx + 1 - - fig.update_layout(height=1200, title_text=f"{instFolder} : Target and Diagnostic Channels", showlegend=False) - - DataOpsAPI.unload_file_obj() - #target_channels.remove(target_channels[0]) - #diagnostic_channels.remove(diagnostic_channels[0]) - return fig , [','.join([item,dataset_name]) for item in variables] #+ [','.join([item,diagnostic_loc]) for item in diagnostic_channels] - -#import os -import json -import h5py - -def load_flags(flagFolderPath, dry_run : bool = False): #filePath, instFolder, dry_run : bool = False): - """ - Returns a list of flags (dictionaries) based on the provided filePath and instFolder. - - Parameters: - ----------- - filePath (str): The path to the uploaded file, expected to have an .h5 extension. - instFolder (str): The name of the instrument folder, which must exist as a group in the HDF5 file. - dry_run (bool): If True, performs all operations except loading file contents. - - Returns: - -------- - list: A list of dictionaries containing flag data (or file paths in dry_run mode), - or None if conditions are not met. - """ - - # Return None if the flags folder does not exist - if not os.path.exists(flagFolderPath): - return None - - # List files in the flags folder - files = [os.path.join(flagFolderPath, f) for f in os.listdir(flagFolderPath)] - - # If no files found, return None - if not files: - return None - - # Sort files by creation time - sortedFiles = sorted(files, key=os.path.getctime) - - if dry_run: - print(f"Dry run: Found {len(sortedFiles)} files in the flags folder:") - for filePath in sortedFiles: - print(f" - {filePath}") - return sortedFiles # Return file paths in dry run mode - - # Process and load JSON files - flagDataList = [] - for filePath in sortedFiles: - if filePath.endswith('.json'): - try: - with open(filePath, 'r') as file: - flagDataList.append(json.load(file)) - except (json.JSONDecodeError, FileNotFoundError) as e: - print(f"Error loading file {filePath}: {e}") - continue # Skip invalid or missing files - - return flagDataList - -class FlaggingAppDataManager(): - - def __init__(self, file_path, mode = 'r+') -> None: - - self.file_path = file_path - self.mode = mode - self._data_ops_obj = None - self.file_obj = None - self.datasets_metadata_df = None - - return None - - def load_file_obj(self): - self._data_ops_obj = h5de.HDF5DataOpsManager(self.file_path, self.mode) - self._data_ops_obj.load_file_obj() - self.file_obj = self._data_ops_obj.file_obj - - def unload_file_obj(self): - self._data_ops_obj = h5de.HDF5DataOpsManager(self.file_path, self.mode) - self._data_ops_obj.unload_file_obj() # sets __data_ops_obj.file_obj to None - - def transfer_flags(self): - - if self.file_obj is None: - raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file_obj' method before attempting to modify it.") - - path_to_append_dir, ext = os.path.splitext(self.file_path) - self._data_ops_obj.update_file(path_to_append_dir) - - - def apply_flags(self,instFolder): - - # TODO: apply flags to diagnostic and indivial channels. so far is all channels are cleaned - - if self.file_obj is None: - raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file_obj' method before attempting to modify it.") - - DataOpsManager = self._data_ops_obj - file_obj = self.file_obj - - #with h5py.File(self.file_path, mode = self.mode, track_order=True) as file_obj: - try: - - if not instFolder in file_obj: - raise ValueError(f'Invalid instrument name. Instrument folder {instFolder} was not found in file {self.file_path}.') - - if '_'.join([instFolder,'flags']) not in flag_obj: - raise RuntimeWarning(f'There is no flags to apply. ') - - if not ('diagnostic_channels' in file_obj[instFolder].attrs and 'target_channels' in file_obj[instFolder].attrs): - raise ValueError( - f'Required attributes missing. Instrument folder {instFolder} in file {self.file_path} has to be annotated with ' - 'attributes "diagnostic_channels" and "target_channels" that specify channels location and their names.' - ) - - dataset_name = file_obj[instFolder].attrs['target_channels']['location'][0].decode() - channel_names = file_obj[instFolder].attrs['target_channels']['names'][0].decode().split(',') - - dataset_obj = file_obj[dataset_name] - # TODO: maybe we can do this directly on dataset = dataset_obj[...], which is a structured numpy array, instead of wrapping that as dataframe - dataset_df = DataOpsManager.extract_dataset_as_dataframe(dataset_name) - - # Define datetime variable based on channel names. We assume by design the first entry of the list is the datetime variable name. - datetime_var = channel_names[0] - remaining_vars = channel_names.copy() - remaining_vars.remove(datetime_var) - - ref_datetime_format = dataset_obj.attrs.get(datetime_var,None)['datetime_format'][0].decode() - - #datetime_var_data = pd.Series([item.decode() for item in dataset_obj[datetime_var]]) - #datetime_var_data = pd.to_datetime(datetime_var_data , format = ref_datetime_format, errors = 'coerce') - dataset_df[datetime_var] = dataset_df[datetime_var].apply(lambda x: x.decode() ) - dataset_df[datetime_var] = pd.to_datetime(dataset_df[datetime_var], format = ref_datetime_format, errors = 'coerce') - - flag_indicator = np.zeros(shape = dataset_df[datetime_var].shape, - dtype = bool) - - # TODO: include this information as part of the flag's attributes in the flag recording process - flag_datetime_format='%Y-%m-%d %H:%M:%S.%f' - for flag in file_obj[f'{instFolder}_flags']: - flag_obj = file_obj[f'{instFolder}_flags'][flag]['data_table'] - - # Replace values indicated by flag NaN if flag label refers to invalidated data. - if not flag_obj['flag_code'][0].decode() is 'None': - flag_label = '' - else: - flag_label = flag_obj['flag_label'][0].decode() - - if flag_label == 'I': - t1 = pd.to_datetime(flag_obj['startdate'][0].decode(), format=flag_datetime_format) - t2 = pd.to_datetime(flag_obj['enddate'][0].decode(), format=flag_datetime_format) - - t1_idx = abs(dataset_df[datetime_var]-t1).argmin() - t2_idx = abs(dataset_df[datetime_var]-t2).argmin() - - dataset_df.loc[t1_idx:t2_idx,remaining_vars] = np.nan - - - # Apply the .strftime() method, handling NaT values by filling with an empty string or placeholder - dataset_df[datetime_var] = dataset_df[datetime_var].apply( - lambda x: x.strftime(ref_datetime_format).encode('utf-8') if not pd.isnull(x) else b'' # Handle NaT/NaN by returning empty string - ) - - - # Split full datasetname instFolder/fileName/datatable --> [instFolder, filename, datatable] - dataset_name_parts = dataset_name.split('/') - # Create new instFolder name to store dataset after applying flags - newInstFolder = '_'.join([dataset_name_parts[0],'cleaned']) - dataset_name_parts.remove(dataset_name_parts[0]) - # Put together relative datasetname. Note that instFolder is now missing. - flagged_dataset_name = '/'.join(dataset_name_parts) - - dataset_dict = {'attributes':{}, - 'name':flagged_dataset_name, - 'data': utils.convert_dataframe_to_np_structured_array(dataset_df)} - - dataset_dict['attributes'].update({'creation_date':utils.created_at().encode('utf-8')}) - dataset_dict['attributes'].update(dataset_obj.attrs) - - - DataOpsManager.append_dataset(dataset_dict, newInstFolder) - - except Exception as e: - self._data_ops_obj.unload_file_obj() - print(f"An unexpected error occurred: {e}" - "The file object has been properly closed.") - - - - - #flag_indicator[t1_idx:t2_idx] = True - #(datetime_var_data-t1).min() - - #if not instrument_name in file_obj and not flag_name in file_obj: - # raise ValueError(f'Invalid instrument_name {instrument_name} and flag_name {flag_name}. No object with such names in file {self.file_path}') - #if not f'{instrument_name}_flags': - # raise ValueError(f'There is no flags to work with. Make sure {instrument_name}_flags is created first before running this method.') - + +import dima.src.hdf5_ops as h5de +from plotly.subplots import make_subplots +import plotly.graph_objs as go +import base64 +import os + +import numpy as np +import pandas as pd +import dima.utils.g5505_utils as utils + +UPLOAD_DIRECTORY = 'data/' + +flags_dict = { + "000" : {"flag_label": 'V', "flag_description": "Valid measurement"}, + "100" : {"flag_label": 'V', "flag_description": "Checked by data originator. Valid measurement, overrides any invalid flags"}, + "110" : {"flag_label": 'V', "flag_description": "Episode data checked and accepted by data originator. Valid measurement"}, + "111" : {"flag_label": 'V', "flag_description": "Irregular data checked and accepted by data originator. Valid measurement"}, + "456" : {"flag_label": 'I', "flag_description": "Invalidated by data originator"}, + "460" : {"flag_label": 'I', "flag_description": "Contamination suspected"}, + "559" : {"flag_label": 'V', "flag_description": "Unspecified contamination or local influence, but considered valid"}, + "599" : {"flag_label": 'I', "flag_description": "Unspecified contamination or local influence"}, + "652" : {"flag_label": 'V', "flag_description": "construction/activity nearby"}, + "659" : {"flag_label": 'I', "flag_description": "Unspecified instrument/sampling anomaly"}, + "660" : {"flag_label": 'V', "flag_description": "Unspecified instrument/sampling anomaly"}, + "999" : {"flag_label": 'I', "flag_description": "Missing measurement, unspecified reason"} + } + +def save_file(name, content): + # Decode the content and save the file + content_type, content_string = content.split(',') + decoded = base64.b64decode(content_string) + file_path = os.path.join(UPLOAD_DIRECTORY, name) + if not os.path.exists(file_path): + with open(file_path, "wb") as f: + f.write(decoded) + print(f"File saved successfully at {file_path}") + return file_path + else: + print(f'File already exists at {file_path}.\nTo maintain the integrity of the existing file, it will not be overwritten.') + return file_path + +def filter_flags_by_label(flags_dict, label): + """ + Filters the flags dictionary by the specified label. + + Parameters: + ----------- + flags_dict (dict): The dictionary containing flags. + label (str): The label to filter by ('I' or 'V'). + + Returns: + -------- + list: A list of dictionaries with 'label' and 'value' for the specified label. + """ + return [{'label': value['flag_description'], 'value': code} + for code, value in flags_dict.items() if value['flag_label'] == label] + + +def create_loaded_file_figure(file_path, instFolder, dataset_name, datetime_var, datetime_var_format, variables): + + DataOpsAPI = h5de.HDF5DataOpsManager(file_path) + + if not DataOpsAPI.file_obj: + DataOpsAPI.load_file_obj() + + #target_channels = DataOpsAPI.file_obj[instfolder].attrs['target_channels']['names'][0].decode().split(',') + #target_loc = DataOpsAPI.file_obj[instfolder].attrs['target_channels']['location'][0].decode() + #diagnostic_channels = DataOpsAPI.file_obj[instfolder].attrs['diagnostic_channels']['names'][0].decode().split(',') + #diagnostic_loc = DataOpsAPI.file_obj[instfolder].attrs['diagnostic_channels']['location'][0].decode() + + #fig = make_subplots(rows=(len(target_channels+diagnostic_channels)-2), cols=1, shared_xaxes=True, + # row_heights = [1 for i in range(len(target_channels+diagnostic_channels)-2)]) + fig = make_subplots(rows=(len(variables)), cols=1, + row_heights = [1 for i in range(len(variables))]) + traces = [] + trace_idx = 1 + dataset = DataOpsAPI.file_obj[dataset_name] + time_column = DataOpsAPI.reformat_datetime_column(dataset_name, + datetime_var, + datetime_var_format) + + #time_column = dataset[datetime_var][:] + for i in range(1,len(variables)): + + fig.add_trace(go.Scatter(x = time_column, + y = dataset[variables[i]][:], + mode = 'lines', + name = variables[i]), row=trace_idx, col=1) + fig.update_yaxes(title_text= variables[i], row=trace_idx, col=1) + trace_idx = trace_idx + 1 + + #dataset = DataOpsAPI.file_obj[diagnostic_loc] + #time_column = DataOpsAPI.reformat_datetime_column(diagnostic_loc,diagnostic_channels[0],'%d.%m.%Y %H:%M:%S') + #for i in range(1,len(diagnostic_channels)): + + # fig.add_trace(go.Scatter(x = time_column, + # y = dataset[diagnostic_channels[i]][:], + # mode = 'lines', + # name = diagnostic_channels[i]), row=trace_idx, col=1) + # fig.update_yaxes(title_text= diagnostic_channels[i], row=trace_idx, col=1, type="log") + # trace_idx = trace_idx + 1 + + fig.update_layout(height=1200, title_text=f"{instFolder} : Target and Diagnostic Channels", showlegend=False) + + DataOpsAPI.unload_file_obj() + #target_channels.remove(target_channels[0]) + #diagnostic_channels.remove(diagnostic_channels[0]) + return fig , [','.join([item,dataset_name]) for item in variables] #+ [','.join([item,diagnostic_loc]) for item in diagnostic_channels] + +#import os +import json +import h5py + +def load_flags(flagFolderPath, dry_run : bool = False): #filePath, instFolder, dry_run : bool = False): + """ + Returns a list of flags (dictionaries) based on the provided filePath and instFolder. + + Parameters: + ----------- + filePath (str): The path to the uploaded file, expected to have an .h5 extension. + instFolder (str): The name of the instrument folder, which must exist as a group in the HDF5 file. + dry_run (bool): If True, performs all operations except loading file contents. + + Returns: + -------- + list: A list of dictionaries containing flag data (or file paths in dry_run mode), + or None if conditions are not met. + """ + + # Return None if the flags folder does not exist + if not os.path.exists(flagFolderPath): + return None + + # List files in the flags folder + files = [os.path.join(flagFolderPath, f) for f in os.listdir(flagFolderPath)] + + # If no files found, return None + if not files: + return None + + # Sort files by creation time + sortedFiles = sorted(files, key=os.path.getctime) + + if dry_run: + print(f"Dry run: Found {len(sortedFiles)} files in the flags folder:") + for filePath in sortedFiles: + print(f" - {filePath}") + return sortedFiles # Return file paths in dry run mode + + # Process and load JSON files + flagDataList = [] + for filePath in sortedFiles: + if filePath.endswith('.json'): + try: + with open(filePath, 'r') as file: + flagDataList.append(json.load(file)) + except (json.JSONDecodeError, FileNotFoundError) as e: + print(f"Error loading file {filePath}: {e}") + continue # Skip invalid or missing files + + return flagDataList + +class FlaggingAppDataManager(): + + def __init__(self, file_path, mode = 'r+') -> None: + + self.file_path = file_path + self.mode = mode + self._data_ops_obj = None + self.file_obj = None + self.datasets_metadata_df = None + + return None + + def load_file_obj(self): + self._data_ops_obj = h5de.HDF5DataOpsManager(self.file_path, self.mode) + self._data_ops_obj.load_file_obj() + self.file_obj = self._data_ops_obj.file_obj + + def unload_file_obj(self): + self._data_ops_obj = h5de.HDF5DataOpsManager(self.file_path, self.mode) + self._data_ops_obj.unload_file_obj() # sets __data_ops_obj.file_obj to None + + def transfer_flags(self): + + if self.file_obj is None: + raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file_obj' method before attempting to modify it.") + + path_to_append_dir, ext = os.path.splitext(self.file_path) + self._data_ops_obj.update_file(path_to_append_dir) + + + def apply_flags(self,instFolder): + + # TODO: apply flags to diagnostic and indivial channels. so far is all channels are cleaned + + if self.file_obj is None: + raise RuntimeError("File object is not loaded. Please load the HDF5 file using the 'load_file_obj' method before attempting to modify it.") + + DataOpsManager = self._data_ops_obj + file_obj = self.file_obj + + #with h5py.File(self.file_path, mode = self.mode, track_order=True) as file_obj: + try: + + if not instFolder in file_obj: + raise ValueError(f'Invalid instrument name. Instrument folder {instFolder} was not found in file {self.file_path}.') + + if '_'.join([instFolder,'flags']) not in flag_obj: + raise RuntimeWarning(f'There is no flags to apply. ') + + if not ('diagnostic_channels' in file_obj[instFolder].attrs and 'target_channels' in file_obj[instFolder].attrs): + raise ValueError( + f'Required attributes missing. Instrument folder {instFolder} in file {self.file_path} has to be annotated with ' + 'attributes "diagnostic_channels" and "target_channels" that specify channels location and their names.' + ) + + dataset_name = file_obj[instFolder].attrs['target_channels']['location'][0].decode() + channel_names = file_obj[instFolder].attrs['target_channels']['names'][0].decode().split(',') + + dataset_obj = file_obj[dataset_name] + # TODO: maybe we can do this directly on dataset = dataset_obj[...], which is a structured numpy array, instead of wrapping that as dataframe + dataset_df = DataOpsManager.extract_dataset_as_dataframe(dataset_name) + + # Define datetime variable based on channel names. We assume by design the first entry of the list is the datetime variable name. + datetime_var = channel_names[0] + remaining_vars = channel_names.copy() + remaining_vars.remove(datetime_var) + + ref_datetime_format = dataset_obj.attrs.get(datetime_var,None)['datetime_format'][0].decode() + + #datetime_var_data = pd.Series([item.decode() for item in dataset_obj[datetime_var]]) + #datetime_var_data = pd.to_datetime(datetime_var_data , format = ref_datetime_format, errors = 'coerce') + dataset_df[datetime_var] = dataset_df[datetime_var].apply(lambda x: x.decode() ) + dataset_df[datetime_var] = pd.to_datetime(dataset_df[datetime_var], format = ref_datetime_format, errors = 'coerce') + + flag_indicator = np.zeros(shape = dataset_df[datetime_var].shape, + dtype = bool) + + # TODO: include this information as part of the flag's attributes in the flag recording process + flag_datetime_format='%Y-%m-%d %H:%M:%S.%f' + for flag in file_obj[f'{instFolder}_flags']: + flag_obj = file_obj[f'{instFolder}_flags'][flag]['data_table'] + + # Replace values indicated by flag NaN if flag label refers to invalidated data. + if not flag_obj['flag_code'][0].decode() is 'None': + flag_label = '' + else: + flag_label = flag_obj['flag_label'][0].decode() + + if flag_label == 'I': + t1 = pd.to_datetime(flag_obj['startdate'][0].decode(), format=flag_datetime_format) + t2 = pd.to_datetime(flag_obj['enddate'][0].decode(), format=flag_datetime_format) + + t1_idx = abs(dataset_df[datetime_var]-t1).argmin() + t2_idx = abs(dataset_df[datetime_var]-t2).argmin() + + dataset_df.loc[t1_idx:t2_idx,remaining_vars] = np.nan + + + # Apply the .strftime() method, handling NaT values by filling with an empty string or placeholder + dataset_df[datetime_var] = dataset_df[datetime_var].apply( + lambda x: x.strftime(ref_datetime_format).encode('utf-8') if not pd.isnull(x) else b'' # Handle NaT/NaN by returning empty string + ) + + + # Split full datasetname instFolder/fileName/datatable --> [instFolder, filename, datatable] + dataset_name_parts = dataset_name.split('/') + # Create new instFolder name to store dataset after applying flags + newInstFolder = '_'.join([dataset_name_parts[0],'cleaned']) + dataset_name_parts.remove(dataset_name_parts[0]) + # Put together relative datasetname. Note that instFolder is now missing. + flagged_dataset_name = '/'.join(dataset_name_parts) + + dataset_dict = {'attributes':{}, + 'name':flagged_dataset_name, + 'data': utils.convert_dataframe_to_np_structured_array(dataset_df)} + + dataset_dict['attributes'].update({'creation_date':utils.created_at().encode('utf-8')}) + dataset_dict['attributes'].update(dataset_obj.attrs) + + + DataOpsManager.append_dataset(dataset_dict, newInstFolder) + + except Exception as e: + self._data_ops_obj.unload_file_obj() + print(f"An unexpected error occurred: {e}" + "The file object has been properly closed.") + + + + + #flag_indicator[t1_idx:t2_idx] = True + #(datetime_var_data-t1).min() + + #if not instrument_name in file_obj and not flag_name in file_obj: + # raise ValueError(f'Invalid instrument_name {instrument_name} and flag_name {flag_name}. No object with such names in file {self.file_path}') + #if not f'{instrument_name}_flags': + # raise ValueError(f'There is no flags to work with. Make sure {instrument_name}_flags is created first before running this method.') +