diff --git a/workflow_data_integration.ipynb b/workflow_data_integration.ipynb index 45dc92c..8b2a0f9 100644 --- a/workflow_data_integration.ipynb +++ b/workflow_data_integration.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -20,16 +20,8 @@ "sys.path.append(root_dir)\n", "\n", "import src.hdf5_vis as hdf5_vis\n", - "import src.hdf5_lib as hdf5_lib\n", - "import input_files.config_file as config_file\n", - "\n", - "\n", - "output_dir = 'output_files/'\n", - "group_id = '5505'#'smog_chamber'#'5505'\n", - "user_initials = 'LL' #'NG' #'LL' # 'TBR'\n", - "\n", - "group_id = 'smog_chamber'#'5505'\n", - "user_initials = 'NG'#'LL' #'NG' #'LL' # 'TBR'\n" + "import src.data_integration_lib as dilib\n", + "import yaml\n" ] }, { @@ -42,1001 +34,28 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[==================================================--------------------------------------------------] 50.0% ...Uploading files in \\\\fs03\\Iron_Sulphate\\smps\\20220726\r" - ] - }, - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "branchvalues": "remainder", - "customdata": [ - "
", - "/gas", - "/gas/20220726_000004_MSC_gases.txt", - "/gas/20220726_000004_MSC_gases.txt/categorial_variable_names", - "/gas/20220726_000004_MSC_gases.txt/categorical_variables", - "/gas/20220726_000004_MSC_gases.txt/numerical_variable_names", - "/gas/20220726_000004_MSC_gases.txt/numerical_variables", - "/gas/20220726_101617_MSC_gases.txt", - "/gas/20220726_101617_MSC_gases.txt/categorial_variable_names", - "/gas/20220726_101617_MSC_gases.txt/categorical_variables", - "/gas/20220726_101617_MSC_gases.txt/numerical_variable_names", - "/gas/20220726_101617_MSC_gases.txt/numerical_variables", - "/smps", - "/smps/20220726", - "/smps/20220726/20220726_mass.TXT", - "/smps/20220726/20220726_mass.TXT/categorial_variable_names", - "/smps/20220726/20220726_mass.TXT/categorical_variables", - "/smps/20220726/20220726_mass.TXT/numerical_variable_names", - "/smps/20220726/20220726_mass.TXT/numerical_variables", - "/smps/20220726/20220726_num.TXT", - "/smps/20220726/20220726_num.TXT/categorial_variable_names", - "/smps/20220726/20220726_num.TXT/categorical_variables", - "/smps/20220726/20220726_num.TXT/numerical_variable_names", - "/smps/20220726/20220726_num.TXT/numerical_variables" - ], - "hovertemplate": "%{label}
Count: %{value}
Path: %{customdata}", - "labels": [ - "/", - "/gas", - "/gas/20220726_000004_MSC_gases.txt", - "/gas/20220726_000004_MSC_gases.txt/categorial_variable_names", - "/gas/20220726_000004_MSC_gases.txt/categorical_variables", - "/gas/20220726_000004_MSC_gases.txt/numerical_variable_names", - "/gas/20220726_000004_MSC_gases.txt/numerical_variables", - "/gas/20220726_101617_MSC_gases.txt", - "/gas/20220726_101617_MSC_gases.txt/categorial_variable_names", - "/gas/20220726_101617_MSC_gases.txt/categorical_variables", - "/gas/20220726_101617_MSC_gases.txt/numerical_variable_names", - "/gas/20220726_101617_MSC_gases.txt/numerical_variables", - "/smps", - "/smps/20220726", - "/smps/20220726/20220726_mass.TXT", - "/smps/20220726/20220726_mass.TXT/categorial_variable_names", - "/smps/20220726/20220726_mass.TXT/categorical_variables", - "/smps/20220726/20220726_mass.TXT/numerical_variable_names", - "/smps/20220726/20220726_mass.TXT/numerical_variables", - "/smps/20220726/20220726_num.TXT", - "/smps/20220726/20220726_num.TXT/categorial_variable_names", - "/smps/20220726/20220726_num.TXT/categorical_variables", - "/smps/20220726/20220726_num.TXT/numerical_variable_names", - "/smps/20220726/20220726_num.TXT/numerical_variables" - ], - "name": "", - "parents": [ - "", - "/", - "/gas", - "/gas/20220726_000004_MSC_gases.txt", - "/gas/20220726_000004_MSC_gases.txt", - "/gas/20220726_000004_MSC_gases.txt", - "/gas/20220726_000004_MSC_gases.txt", - "/gas", - "/gas/20220726_101617_MSC_gases.txt", - "/gas/20220726_101617_MSC_gases.txt", - "/gas/20220726_101617_MSC_gases.txt", - "/gas/20220726_101617_MSC_gases.txt", - "/", - "/smps", - "/smps/20220726", - "/smps/20220726/20220726_mass.TXT", - "/smps/20220726/20220726_mass.TXT", - "/smps/20220726/20220726_mass.TXT", - "/smps/20220726/20220726_mass.TXT", - "/smps/20220726", - "/smps/20220726/20220726_num.TXT", - "/smps/20220726/20220726_num.TXT", - "/smps/20220726/20220726_num.TXT", - "/smps/20220726/20220726_num.TXT" - ], - "root": { - "color": "lightgrey" - }, - "type": "treemap", - "values": [ - 2, - 2, - 4, - 1, - 1, - 1, - 1, - 4, - 1, - 1, - 1, - 1, - 1, - 2, - 4, - 1, - 1, - 1, - 1, - 4, - 1, - 1, - 1, - 1 - ] - } - ], - "layout": { - "height": 600, - "margin": { - "b": 25, - "l": 25, - "r": 25, - "t": 50 - }, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "heatmapgl": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmapgl" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "width": 800 - } - } - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ - "#input_file_dir = '//fs101/5505/People/Juan/TypicalBeamTime'\n", - "#select_file_keywords=[]\n", - "#select_dir_keywords = ['NEXAFS', 'Notes', 'Photos', 'Pressure', 'RGA', 'SES']\n", - "\n", - "\n", - "#input_file_dir = '//fs101/5505/Data' \n", - "#select_dir_keywords = ['Lopap', 'Humidity_Sensors/2022', 'ICAD/HONO/2022', 'ICAD/NO2/2022', 'T200_NOX', 'T360U_CO2']\n", - "#select_file_keywords = ['2022-03-25','2022_03_25','20220325']\n", - "\n", - "\n", - "input_file_dir = '//fs03/Iron_Sulphate'\n", - "select_dir_keywords = ['gas','smps/20220726']#,'htof/2022.07.26','ptr/2022.07.26','ams/2022.07.26']\n", - "#select_dir_keywords = ['htof','ams', 'ptr', 'gas','smps'] \n", - "\n", - "select_file_keywords = ['20220726','2022.07.26']\n", - "\n", - "config_param = {'group_id' : group_id, 'user_initials' : user_initials, 'output_dir': output_dir}\n", - "\n", - "\n", - "output_filename_path, output_yml_filename_path = hdf5_lib.create_hdf5_file_from_filesystem_path(config_param,\n", - " input_file_dir,\n", - " select_dir_keywords,\n", - " select_file_keywords)\n", - "\n", - "hdf5_vis.display_group_hierarchy_on_a_treemap(output_filename_path)" + "#output_filename_path = 'output_files/unified_file_smog_chamber_2024-04-07_UTC-OFST_+0200_NG.h5'\n", + "yaml_config_file_path = 'input_files/data_integr_config_file_TBR.yaml'\n", + "output_filename_path, output_yml_filename_path = dilib.integrate_data_sources(yaml_config_file_path)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hdf5_vis.display_group_hierarchy_on_a_treemap(output_filename_path)" + ] } ], "metadata": {