Files
dima/docs/build/html/_modules/src/hdf5_ops.html

832 lines
90 KiB
HTML

<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>src.hdf5_ops &mdash; DIMA 1.0.0 documentation</title>
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../_static/documentation_options.js?v=8d563738"></script>
<script src="../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script src="../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../index.html" class="icon icon-home">
DIMA
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html">HDF5 data operations</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.hdf5_writer">Data integration with HDF5</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.hdf5_vis">Data visualization</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/pipelines.html">Pipelines and workflows</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/utils.html">Utilities</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../index.html">DIMA</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
<li class="breadcrumb-item active">src.hdf5_ops</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for src.hdf5_ops</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">sys</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">thisFilePath</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="vm">__file__</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">NameError</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Error: __file__ is not available. Ensure the script is being run from a file.&quot;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;[Notice] Path to DIMA package may not be resolved properly.&quot;</span><span class="p">)</span>
<span class="n">thisFilePath</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">getcwd</span><span class="p">()</span> <span class="c1"># Use current directory or specify a default</span>
<span class="n">dimaPath</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">normpath</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">thisFilePath</span><span class="p">,</span> <span class="s2">&quot;..&quot;</span><span class="p">,</span><span class="s1">&#39;..&#39;</span><span class="p">))</span> <span class="c1"># Move up to project root</span>
<span class="k">if</span> <span class="n">dimaPath</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">sys</span><span class="o">.</span><span class="n">path</span><span class="p">:</span> <span class="c1"># Avoid duplicate entries</span>
<span class="n">sys</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">dimaPath</span><span class="p">)</span>
<span class="kn">import</span> <span class="nn">h5py</span>
<span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">import</span> <span class="nn">utils.g5505_utils</span> <span class="k">as</span> <span class="nn">utils</span>
<span class="kn">import</span> <span class="nn">src.hdf5_writer</span> <span class="k">as</span> <span class="nn">hdf5_lib</span>
<span class="kn">import</span> <span class="nn">logging</span>
<span class="kn">import</span> <span class="nn">datetime</span>
<span class="kn">import</span> <span class="nn">h5py</span>
<span class="kn">import</span> <span class="nn">yaml</span>
<span class="kn">import</span> <span class="nn">json</span>
<span class="kn">import</span> <span class="nn">copy</span>
<div class="viewcode-block" id="HDF5DataOpsManager">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.HDF5DataOpsManager">[docs]</a>
<span class="k">class</span> <span class="nc">HDF5DataOpsManager</span><span class="p">():</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> A class to handle HDF5 fundamental middle level file operations to power data updates, metadata revision, and data analysis</span>
<span class="sd"> with hdf5 files encoding multi-instrument experimental campaign data.</span>
<span class="sd"> Parameters:</span>
<span class="sd"> -----------</span>
<span class="sd"> path_to_file : str</span>
<span class="sd"> path/to/hdf5file.</span>
<span class="sd"> mode : str</span>
<span class="sd"> &#39;r&#39; or &#39;r+&#39; read or read/write mode only when file exists</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">file_path</span><span class="p">,</span> <span class="n">mode</span> <span class="o">=</span> <span class="s1">&#39;r+&#39;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="c1"># Class attributes</span>
<span class="k">if</span> <span class="n">mode</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">&#39;r&#39;</span><span class="p">,</span><span class="s1">&#39;r+&#39;</span><span class="p">]:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">mode</span> <span class="o">=</span> <span class="n">mode</span>
<span class="bp">self</span><span class="o">.</span><span class="n">file_path</span> <span class="o">=</span> <span class="n">file_path</span>
<span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span> <span class="o">=</span> <span class="kc">None</span>
<span class="c1">#self._open_file()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">dataset_metadata_df</span> <span class="o">=</span> <span class="kc">None</span>
<span class="c1"># Define private methods </span>
<span class="c1"># Define public methods</span>
<div class="viewcode-block" id="HDF5DataOpsManager.load_file_obj">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.HDF5DataOpsManager.load_file_obj">[docs]</a>
<span class="k">def</span> <span class="nf">load_file_obj</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span> <span class="o">=</span> <span class="n">h5py</span><span class="o">.</span><span class="n">File</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">file_path</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">mode</span><span class="p">)</span></div>
<div class="viewcode-block" id="HDF5DataOpsManager.unload_file_obj">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.HDF5DataOpsManager.unload_file_obj">[docs]</a>
<span class="k">def</span> <span class="nf">unload_file_obj</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="o">.</span><span class="n">flush</span><span class="p">()</span> <span class="c1"># Ensure all data is written to disk</span>
<span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span> <span class="o">=</span> <span class="kc">None</span></div>
<div class="viewcode-block" id="HDF5DataOpsManager.extract_and_load_dataset_metadata">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.HDF5DataOpsManager.extract_and_load_dataset_metadata">[docs]</a>
<span class="k">def</span> <span class="nf">extract_and_load_dataset_metadata</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">def</span> <span class="nf">__get_datasets</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">obj</span><span class="p">,</span> <span class="n">list_of_datasets</span><span class="p">):</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span><span class="n">h5py</span><span class="o">.</span><span class="n">Dataset</span><span class="p">):</span>
<span class="n">list_of_datasets</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">name</span><span class="p">)</span>
<span class="c1">#print(f&#39;Adding dataset: {name}&#39;) #tail: {head} head: {tail}&#39;)</span>
<span class="n">list_of_datasets</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">&quot;File object is not loaded. Please load the HDF5 file using the &#39;load_file_obj&#39; method before attempting to extract datasets.&quot;</span><span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">list_of_datasets</span> <span class="o">=</span> <span class="p">[]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="o">.</span><span class="n">visititems</span><span class="p">(</span><span class="k">lambda</span> <span class="n">name</span><span class="p">,</span> <span class="n">obj</span><span class="p">:</span> <span class="n">__get_datasets</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">obj</span><span class="p">,</span> <span class="n">list_of_datasets</span><span class="p">))</span>
<span class="n">dataset_metadata_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;dataset_name&#39;</span><span class="p">:</span> <span class="n">list_of_datasets</span><span class="p">})</span>
<span class="n">dataset_metadata_df</span><span class="p">[</span><span class="s1">&#39;parent_instrument&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">dataset_metadata_df</span><span class="p">[</span><span class="s1">&#39;dataset_name&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;/&#39;</span><span class="p">)[</span><span class="o">-</span><span class="mi">3</span><span class="p">])</span>
<span class="n">dataset_metadata_df</span><span class="p">[</span><span class="s1">&#39;parent_file&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">dataset_metadata_df</span><span class="p">[</span><span class="s1">&#39;dataset_name&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;/&#39;</span><span class="p">)[</span><span class="o">-</span><span class="mi">2</span><span class="p">])</span>
<span class="bp">self</span><span class="o">.</span><span class="n">dataset_metadata_df</span> <span class="o">=</span> <span class="n">dataset_metadata_df</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">unload_file_obj</span><span class="p">()</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;An unexpected error occurred: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">. File object will be unloaded.&quot;</span><span class="p">)</span> </div>
<div class="viewcode-block" id="HDF5DataOpsManager.extract_dataset_as_dataframe">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.HDF5DataOpsManager.extract_dataset_as_dataframe">[docs]</a>
<span class="k">def</span> <span class="nf">extract_dataset_as_dataframe</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span><span class="n">dataset_name</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot; </span>
<span class="sd"> returns a copy of the dataset content in the form of dataframe when possible or numpy array </span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">&quot;File object is not loaded. Please load the HDF5 file using the &#39;load_file_obj&#39; method before attempting to extract datasets.&quot;</span><span class="p">)</span>
<span class="n">dataset_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="p">[</span><span class="n">dataset_name</span><span class="p">]</span>
<span class="c1"># Read dataset content from dataset obj</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">dataset_obj</span><span class="p">[</span><span class="o">...</span><span class="p">]</span>
<span class="c1"># The above statement can be understood as follows: </span>
<span class="c1"># data = np.empty(shape=dataset_obj.shape, </span>
<span class="c1"># dtype=dataset_obj.dtype)</span>
<span class="c1"># dataset_obj.read_direct(data)</span>
<span class="k">try</span><span class="p">:</span>
<span class="k">return</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">ValueError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="n">logging</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Failed to convert dataset &#39;</span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s2">&#39; to DataFrame: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">. Instead, dataset will be returned as Numpy array.&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">data</span> <span class="c1"># &#39;data&#39; is a NumPy array here</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">unload_file_obj</span><span class="p">()</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;An unexpected error occurred: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">. Returning None and unloading file object&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">None</span></div>
<span class="c1"># Define metadata revision methods: append(), update(), delete(), and rename().</span>
<div class="viewcode-block" id="HDF5DataOpsManager.append_metadata">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.HDF5DataOpsManager.append_metadata">[docs]</a>
<span class="k">def</span> <span class="nf">append_metadata</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">obj_name</span><span class="p">,</span> <span class="n">annotation_dict</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot; </span>
<span class="sd"> Appends metadata attributes to the specified object (obj_name) based on the provided annotation_dict.</span>
<span class="sd"> This method ensures that the provided metadata attributes do not overwrite any existing ones. If an attribute already exists, </span>
<span class="sd"> a ValueError is raised. The function supports storing scalar values (int, float, str) and compound values such as dictionaries </span>
<span class="sd"> that are converted into NumPy structured arrays before being added to the metadata.</span>
<span class="sd"> Parameters:</span>
<span class="sd"> -----------</span>
<span class="sd"> obj_name: str</span>
<span class="sd"> Path to the target object (dataset or group) within the HDF5 file.</span>
<span class="sd"> annotation_dict: dict</span>
<span class="sd"> A dictionary where the keys represent new attribute names (strings), and the values can be:</span>
<span class="sd"> - Scalars: int, float, or str.</span>
<span class="sd"> - Compound values (dictionaries) for more complex metadata, which are converted to NumPy structured arrays. </span>
<span class="sd"> Example of a compound value:</span>
<span class="sd"> </span>
<span class="sd"> Example:</span>
<span class="sd"> ----------</span>
<span class="sd"> annotation_dict = {</span>
<span class="sd"> &quot;relative_humidity&quot;: {</span>
<span class="sd"> &quot;value&quot;: 65,</span>
<span class="sd"> &quot;units&quot;: &quot;percentage&quot;,</span>
<span class="sd"> &quot;range&quot;: &quot;[0,100]&quot;,</span>
<span class="sd"> &quot;definition&quot;: &quot;amount of water vapor present ...&quot;</span>
<span class="sd"> }</span>
<span class="sd"> }</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">&quot;File object is not loaded. Please load the HDF5 file using the &#39;load_file_obj&#39; method before attempting to modify it.&quot;</span><span class="p">)</span>
<span class="c1"># Create a copy of annotation_dict to avoid modifying the original</span>
<span class="n">annotation_dict_copy</span> <span class="o">=</span> <span class="n">copy</span><span class="o">.</span><span class="n">deepcopy</span><span class="p">(</span><span class="n">annotation_dict</span><span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="p">[</span><span class="n">obj_name</span><span class="p">]</span>
<span class="c1"># Check if any attribute already exists</span>
<span class="k">if</span> <span class="nb">any</span><span class="p">(</span><span class="n">key</span> <span class="ow">in</span> <span class="n">obj</span><span class="o">.</span><span class="n">attrs</span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">annotation_dict_copy</span><span class="o">.</span><span class="n">keys</span><span class="p">()):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Make sure the provided (key, value) pairs are not existing metadata elements or attributes. To modify or delete existing attributes use .modify_annotation() or .delete_annotation()&quot;</span><span class="p">)</span>
<span class="c1"># Process the dictionary values and convert them to structured arrays if needed</span>
<span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">annotation_dict_copy</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
<span class="c1"># Convert dictionaries to NumPy structured arrays for complex attributes</span>
<span class="n">annotation_dict_copy</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">convert_attrdict_to_np_structured_array</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
<span class="c1"># Update the object&#39;s attributes with the new metadata</span>
<span class="n">obj</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">annotation_dict_copy</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">unload_file_obj</span><span class="p">()</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;An unexpected error occurred: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">. The file object has been properly closed.&quot;</span><span class="p">)</span></div>
<div class="viewcode-block" id="HDF5DataOpsManager.update_metadata">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.HDF5DataOpsManager.update_metadata">[docs]</a>
<span class="k">def</span> <span class="nf">update_metadata</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">obj_name</span><span class="p">,</span> <span class="n">annotation_dict</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot; </span>
<span class="sd"> Updates the value of existing metadata attributes of the specified object (obj_name) based on the provided annotation_dict.</span>
<span class="sd"> The function disregards non-existing attributes and suggests to use the append_metadata() method to include those in the metadata.</span>
<span class="sd"> Parameters:</span>
<span class="sd"> -----------</span>
<span class="sd"> obj_name : str</span>
<span class="sd"> Path to the target object (dataset or group) within the HDF5 file.</span>
<span class="sd"> annotation_dict: dict</span>
<span class="sd"> A dictionary where the keys represent existing attribute names (strings), and the values can be:</span>
<span class="sd"> - Scalars: int, float, or str.</span>
<span class="sd"> - Compound values (dictionaries) for more complex metadata, which are converted to NumPy structured arrays. </span>
<span class="sd"> Example of a compound value:</span>
<span class="sd"> </span>
<span class="sd"> Example:</span>
<span class="sd"> ----------</span>
<span class="sd"> annotation_dict = {</span>
<span class="sd"> &quot;relative_humidity&quot;: {</span>
<span class="sd"> &quot;value&quot;: 65,</span>
<span class="sd"> &quot;units&quot;: &quot;percentage&quot;,</span>
<span class="sd"> &quot;range&quot;: &quot;[0,100]&quot;,</span>
<span class="sd"> &quot;definition&quot;: &quot;amount of water vapor present ...&quot;</span>
<span class="sd"> }</span>
<span class="sd"> }</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">&quot;File object is not loaded. Please load the HDF5 file using the &#39;load_file_obj&#39; method before attempting to modify it.&quot;</span><span class="p">)</span>
<span class="n">update_dict</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="p">[</span><span class="n">obj_name</span><span class="p">]</span>
<span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">annotation_dict</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="k">if</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">obj</span><span class="o">.</span><span class="n">attrs</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
<span class="n">update_dict</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">convert_attrdict_to_np_structured_array</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">update_dict</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">value</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># Optionally, log or warn about non-existing keys being ignored.</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Warning: Key &#39;</span><span class="si">{</span><span class="n">key</span><span class="si">}</span><span class="s2">&#39; does not exist and will be ignored.&quot;</span><span class="p">)</span>
<span class="n">obj</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">update_dict</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">unload_file_obj</span><span class="p">()</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;An unexpected error occurred: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">. The file object has been properly closed.&quot;</span><span class="p">)</span></div>
<div class="viewcode-block" id="HDF5DataOpsManager.delete_metadata">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.HDF5DataOpsManager.delete_metadata">[docs]</a>
<span class="k">def</span> <span class="nf">delete_metadata</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">obj_name</span><span class="p">,</span> <span class="n">annotation_dict</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Deletes metadata attributes of the specified object (obj_name) based on the provided annotation_dict.</span>
<span class="sd"> Parameters:</span>
<span class="sd"> -----------</span>
<span class="sd"> obj_name: str</span>
<span class="sd"> Path to the target object (dataset or group) within the HDF5 file.</span>
<span class="sd"> </span>
<span class="sd"> annotation_dict: dict</span>
<span class="sd"> Dictionary where keys represent attribute names, and values should be dictionaries containing </span>
<span class="sd"> {&quot;delete&quot;: True} to mark them for deletion.</span>
<span class="sd"> Example:</span>
<span class="sd"> --------</span>
<span class="sd"> annotation_dict = {&quot;attr_to_be_deleted&quot;: {&quot;delete&quot;: True}}</span>
<span class="sd"> Behavior:</span>
<span class="sd"> ---------</span>
<span class="sd"> - Deletes the specified attributes from the object&#39;s metadata if marked for deletion.</span>
<span class="sd"> - Issues a warning if the attribute is not found or not marked for deletion.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">&quot;File object is not loaded. Please load the HDF5 file using the &#39;load_file_obj&#39; method before attempting to modify it.&quot;</span><span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="p">[</span><span class="n">obj_name</span><span class="p">]</span>
<span class="k">for</span> <span class="n">attr_key</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">annotation_dict</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="k">if</span> <span class="n">attr_key</span> <span class="ow">in</span> <span class="n">obj</span><span class="o">.</span><span class="n">attrs</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">dict</span><span class="p">)</span> <span class="ow">and</span> <span class="n">value</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;delete&#39;</span><span class="p">,</span> <span class="kc">False</span><span class="p">):</span>
<span class="n">obj</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="fm">__delitem__</span><span class="p">(</span><span class="n">attr_key</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">msg</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;Warning: Value for key &#39;</span><span class="si">{</span><span class="n">attr_key</span><span class="si">}</span><span class="s2">&#39; is not marked for deletion or is invalid.&quot;</span>
<span class="nb">print</span><span class="p">(</span><span class="n">msg</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">msg</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;Warning: Key &#39;</span><span class="si">{</span><span class="n">attr_key</span><span class="si">}</span><span class="s2">&#39; does not exist in metadata.&quot;</span>
<span class="nb">print</span><span class="p">(</span><span class="n">msg</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">unload_file_obj</span><span class="p">()</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;An unexpected error occurred: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">. The file object has been properly closed.&quot;</span><span class="p">)</span></div>
<div class="viewcode-block" id="HDF5DataOpsManager.rename_metadata">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.HDF5DataOpsManager.rename_metadata">[docs]</a>
<span class="k">def</span> <span class="nf">rename_metadata</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">obj_name</span><span class="p">,</span> <span class="n">renaming_map</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot; </span>
<span class="sd"> Renames metadata attributes of the specified object (obj_name) based on the provided renaming_map.</span>
<span class="sd"> Parameters:</span>
<span class="sd"> -----------</span>
<span class="sd"> obj_name: str</span>
<span class="sd"> Path to the target object (dataset or group) within the HDF5 file.</span>
<span class="sd"> renaming_map: dict</span>
<span class="sd"> A dictionary where keys are current attribute names (strings), and values are the new attribute names (strings or byte strings) to rename to.</span>
<span class="sd"> </span>
<span class="sd"> Example:</span>
<span class="sd"> --------</span>
<span class="sd"> renaming_map = {</span>
<span class="sd"> &quot;old_attr_name&quot;: &quot;new_attr_name&quot;,</span>
<span class="sd"> &quot;old_attr_2&quot;: &quot;new_attr_2&quot;</span>
<span class="sd"> }</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">&quot;File object is not loaded. Please load the HDF5 file using the &#39;load_file_obj&#39; method before attempting to modify it.&quot;</span><span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="p">[</span><span class="n">obj_name</span><span class="p">]</span>
<span class="c1"># Iterate over the renaming_map to process renaming</span>
<span class="k">for</span> <span class="n">old_attr</span><span class="p">,</span> <span class="n">new_attr</span> <span class="ow">in</span> <span class="n">renaming_map</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="k">if</span> <span class="n">old_attr</span> <span class="ow">in</span> <span class="n">obj</span><span class="o">.</span><span class="n">attrs</span><span class="p">:</span>
<span class="c1"># Get the old attribute&#39;s value</span>
<span class="n">attr_value</span> <span class="o">=</span> <span class="n">obj</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="n">old_attr</span><span class="p">]</span>
<span class="c1"># Create a new attribute with the new name</span>
<span class="n">obj</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="n">new_attr</span><span class="p">,</span> <span class="n">data</span><span class="o">=</span><span class="n">attr_value</span><span class="p">)</span>
<span class="c1"># Delete the old attribute</span>
<span class="n">obj</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="fm">__delitem__</span><span class="p">(</span><span class="n">old_attr</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># Skip if the old attribute doesn&#39;t exist</span>
<span class="n">msg</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;Skipping: Attribute &#39;</span><span class="si">{</span><span class="n">old_attr</span><span class="si">}</span><span class="s2">&#39; does not exist.&quot;</span>
<span class="nb">print</span><span class="p">(</span><span class="n">msg</span><span class="p">)</span> <span class="c1"># Optionally, replace with warnings.warn(msg)</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">unload_file_obj</span><span class="p">()</span>
<span class="nb">print</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;An unexpected error occurred: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">. The file object has been properly closed. &quot;</span>
<span class="s2">&quot;Please ensure that &#39;obj_name&#39; exists in the file, and that the keys in &#39;renaming_map&#39; are valid attributes of the object.&quot;</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">unload_file_obj</span><span class="p">()</span></div>
<div class="viewcode-block" id="HDF5DataOpsManager.get_metadata">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.HDF5DataOpsManager.get_metadata">[docs]</a>
<span class="k">def</span> <span class="nf">get_metadata</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">obj_path</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot; Get file attributes from object at path = obj_path. For example,</span>
<span class="sd"> obj_path = &#39;/&#39; will get root level attributes or metadata.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">try</span><span class="p">:</span>
<span class="c1"># Access the attributes for the object at the given path</span>
<span class="n">metadata_dict</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="p">[</span><span class="n">obj_path</span><span class="p">]</span><span class="o">.</span><span class="n">attrs</span>
<span class="k">except</span> <span class="ne">KeyError</span><span class="p">:</span>
<span class="c1"># Handle the case where the path doesn&#39;t exist</span>
<span class="n">logging</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;Invalid object path: </span><span class="si">{</span><span class="n">obj_path</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="n">metadata_dict</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">return</span> <span class="n">metadata_dict</span></div>
<div class="viewcode-block" id="HDF5DataOpsManager.reformat_datetime_column">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.HDF5DataOpsManager.reformat_datetime_column">[docs]</a>
<span class="k">def</span> <span class="nf">reformat_datetime_column</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataset_name</span><span class="p">,</span> <span class="n">column_name</span><span class="p">,</span> <span class="n">src_format</span><span class="p">,</span> <span class="n">desired_format</span><span class="o">=</span><span class="s1">&#39;%Y-%m-</span><span class="si">%d</span><span class="s1"> %H:%M:%S.</span><span class="si">%f</span><span class="s1">&#39;</span><span class="p">):</span>
<span class="c1"># Access the dataset</span>
<span class="n">dataset</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="p">[</span><span class="n">dataset_name</span><span class="p">]</span>
<span class="c1"># Read the column data into a pandas Series and decode bytes to strings</span>
<span class="n">dt_column_data</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">(</span><span class="n">dataset</span><span class="p">[</span><span class="n">column_name</span><span class="p">][:])</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="o">.</span><span class="n">decode</span><span class="p">()</span> <span class="p">)</span>
<span class="c1"># Convert to datetime using the source format</span>
<span class="n">dt_column_data</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">to_datetime</span><span class="p">(</span><span class="n">dt_column_data</span><span class="p">,</span> <span class="nb">format</span><span class="o">=</span><span class="n">src_format</span><span class="p">,</span> <span class="n">errors</span> <span class="o">=</span> <span class="s1">&#39;coerce&#39;</span><span class="p">)</span>
<span class="c1"># Reformat datetime objects to the desired format as strings</span>
<span class="n">dt_column_data</span> <span class="o">=</span> <span class="n">dt_column_data</span><span class="o">.</span><span class="n">dt</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="n">desired_format</span><span class="p">)</span>
<span class="c1"># Encode the strings back to bytes</span>
<span class="c1">#encoded_data = dt_column_data.apply(lambda x: x.encode() if not pd.isnull(x) else &#39;N/A&#39;).to_numpy()</span>
<span class="c1"># Update the dataset in place</span>
<span class="c1">#dataset[column_name][:] = encoded_data</span>
<span class="c1"># Convert byte strings to datetime objects</span>
<span class="c1">#timestamps = [datetime.datetime.strptime(a.decode(), src_format).strftime(desired_format) for a in dt_column_data] </span>
<span class="c1">#datetime.strptime(&#39;31/01/22 23:59:59.999999&#39;,</span>
<span class="c1"># &#39;%d/%m/%y %H:%M:%S.%f&#39;)</span>
<span class="c1">#pd.to_datetime(</span>
<span class="c1"># np.array([a.decode() for a in dt_column_data]),</span>
<span class="c1"># format=src_format,</span>
<span class="c1"># errors=&#39;coerce&#39;</span>
<span class="c1">#)</span>
<span class="c1"># Standardize the datetime format</span>
<span class="c1">#standardized_time = datetime.strftime(desired_format)</span>
<span class="c1"># Convert to byte strings to store back in the HDF5 dataset</span>
<span class="c1">#standardized_time_bytes = np.array([s.encode() for s in timestamps])</span>
<span class="c1"># Update the column in the dataset (in-place update)</span>
<span class="c1"># TODO: make this a more secure operation</span>
<span class="c1">#dataset[column_name][:] = standardized_time_bytes</span>
<span class="c1">#return np.array(timestamps)</span>
<span class="k">return</span> <span class="n">dt_column_data</span><span class="o">.</span><span class="n">to_numpy</span><span class="p">()</span></div>
<span class="c1"># Define data append operations: append_dataset(), and update_file()</span>
<div class="viewcode-block" id="HDF5DataOpsManager.append_dataset">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.HDF5DataOpsManager.append_dataset">[docs]</a>
<span class="k">def</span> <span class="nf">append_dataset</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span><span class="n">dataset_dict</span><span class="p">,</span> <span class="n">group_name</span><span class="p">):</span>
<span class="c1"># Parse value into HDF5 admissible type</span>
<span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">dataset_dict</span><span class="p">[</span><span class="s1">&#39;attributes&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
<span class="n">value</span> <span class="o">=</span> <span class="n">dataset_dict</span><span class="p">[</span><span class="s1">&#39;attributes&#39;</span><span class="p">][</span><span class="n">key</span><span class="p">]</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
<span class="n">dataset_dict</span><span class="p">[</span><span class="s1">&#39;attributes&#39;</span><span class="p">][</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">convert_attrdict_to_np_structured_array</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">group_name</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="o">.</span><span class="n">create_group</span><span class="p">(</span><span class="n">group_name</span><span class="p">,</span> <span class="n">track_order</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="p">[</span><span class="n">group_name</span><span class="p">]</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">&#39;creation_date&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">created_at</span><span class="p">()</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s2">&quot;utf-8&quot;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="p">[</span><span class="n">group_name</span><span class="p">]</span><span class="o">.</span><span class="n">create_dataset</span><span class="p">(</span><span class="n">dataset_dict</span><span class="p">[</span><span class="s1">&#39;name&#39;</span><span class="p">],</span> <span class="n">data</span><span class="o">=</span><span class="n">dataset_dict</span><span class="p">[</span><span class="s1">&#39;data&#39;</span><span class="p">])</span>
<span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="p">[</span><span class="n">group_name</span><span class="p">][</span><span class="n">dataset_dict</span><span class="p">[</span><span class="s1">&#39;name&#39;</span><span class="p">]]</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">dataset_dict</span><span class="p">[</span><span class="s1">&#39;attributes&#39;</span><span class="p">])</span>
<span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="p">[</span><span class="n">group_name</span><span class="p">]</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">&#39;last_update_date&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">created_at</span><span class="p">()</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s2">&quot;utf-8&quot;</span><span class="p">)</span></div>
<div class="viewcode-block" id="HDF5DataOpsManager.update_file">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.HDF5DataOpsManager.update_file">[docs]</a>
<span class="k">def</span> <span class="nf">update_file</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path_to_append_dir</span><span class="p">):</span>
<span class="c1"># Split the reference file path and the append directory path into directories and filenames</span>
<span class="n">ref_tail</span><span class="p">,</span> <span class="n">ref_head</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">file_path</span><span class="p">)</span>
<span class="n">ref_head_filename</span><span class="p">,</span> <span class="n">head_ext</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">splitext</span><span class="p">(</span><span class="n">ref_head</span><span class="p">)</span>
<span class="n">tail</span><span class="p">,</span> <span class="n">head</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">path_to_append_dir</span><span class="p">)</span>
<span class="c1"># Ensure the append directory is in the same directory as the reference file and has the same name (without extension)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="n">ref_tail</span> <span class="o">==</span> <span class="n">tail</span> <span class="ow">and</span> <span class="n">ref_head_filename</span> <span class="o">==</span> <span class="n">head</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;The append directory must be in the same directory as the reference HDF5 file and have the same name without the extension.&quot;</span><span class="p">)</span>
<span class="c1"># Close the file if it&#39;s already open</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">unload_file_obj</span><span class="p">()</span>
<span class="c1"># Attempt to open the file in &#39;r+&#39; mode for appending</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">hdf5_lib</span><span class="o">.</span><span class="n">create_hdf5_file_from_filesystem_path</span><span class="p">(</span><span class="n">path_to_append_dir</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s1">&#39;r+&#39;</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">FileNotFoundError</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Reference HDF5 file &#39;</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">file_path</span><span class="si">}</span><span class="s2">&#39; not found.&quot;</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">OSError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Error opening HDF5 file: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span></div>
</div>
<div class="viewcode-block" id="get_parent_child_relationships">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.get_parent_child_relationships">[docs]</a>
<span class="k">def</span> <span class="nf">get_parent_child_relationships</span><span class="p">(</span><span class="n">file</span><span class="p">:</span> <span class="n">h5py</span><span class="o">.</span><span class="n">File</span><span class="p">):</span>
<span class="n">nodes</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;/&#39;</span><span class="p">]</span>
<span class="n">parent</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;&#39;</span><span class="p">]</span>
<span class="c1">#values = [file.attrs[&#39;count&#39;]]</span>
<span class="c1"># TODO: maybe we should make this more general and not dependent on file_list attribute? </span>
<span class="c1">#if &#39;file_list&#39; in file.attrs.keys():</span>
<span class="c1"># values = [len(file.attrs[&#39;file_list&#39;])]</span>
<span class="c1">#else:</span>
<span class="c1"># values = [1]</span>
<span class="n">values</span> <span class="o">=</span> <span class="p">[</span><span class="nb">len</span><span class="p">(</span><span class="n">file</span><span class="o">.</span><span class="n">keys</span><span class="p">())]</span>
<span class="k">def</span> <span class="nf">node_visitor</span><span class="p">(</span><span class="n">name</span><span class="p">,</span><span class="n">obj</span><span class="p">):</span>
<span class="k">if</span> <span class="n">name</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="s1">&#39;/&#39;</span><span class="p">)</span> <span class="o">&lt;=</span><span class="mi">2</span><span class="p">:</span>
<span class="n">nodes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">obj</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="n">parent</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">obj</span><span class="o">.</span><span class="n">parent</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="c1">#nodes.append(os.path.split(obj.name)[1])</span>
<span class="c1">#parent.append(os.path.split(obj.parent.name)[1])</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span><span class="n">h5py</span><span class="o">.</span><span class="n">Dataset</span><span class="p">):</span><span class="c1"># or not &#39;file_list&#39; in obj.attrs.keys():</span>
<span class="n">values</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="n">obj</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">values</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">obj</span><span class="o">.</span><span class="n">keys</span><span class="p">()))</span>
<span class="k">except</span><span class="p">:</span>
<span class="n">values</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
<span class="n">file</span><span class="o">.</span><span class="n">visititems</span><span class="p">(</span><span class="n">node_visitor</span><span class="p">)</span>
<span class="k">return</span> <span class="n">nodes</span><span class="p">,</span> <span class="n">parent</span><span class="p">,</span> <span class="n">values</span> </div>
<span class="k">def</span> <span class="nf">__print_metadata__</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">obj</span><span class="p">,</span> <span class="n">folder_depth</span><span class="p">,</span> <span class="n">yaml_dict</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Extracts metadata from HDF5 groups and datasets and organizes them into a dictionary with compact representation.</span>
<span class="sd"> </span>
<span class="sd"> Parameters:</span>
<span class="sd"> -----------</span>
<span class="sd"> name (str): Name of the HDF5 object being inspected.</span>
<span class="sd"> obj (h5py.Group or h5py.Dataset): The HDF5 object (Group or Dataset).</span>
<span class="sd"> folder_depth (int): Maximum depth of folders to explore.</span>
<span class="sd"> yaml_dict (dict): Dictionary to populate with metadata.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># Process only objects within the specified folder depth</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">obj</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;/&#39;</span><span class="p">))</span> <span class="o">&lt;=</span> <span class="n">folder_depth</span><span class="p">:</span> <span class="c1"># and &quot;.h5&quot; not in obj.name:</span>
<span class="n">name_to_list</span> <span class="o">=</span> <span class="n">obj</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;/&#39;</span><span class="p">)</span>
<span class="n">name_head</span> <span class="o">=</span> <span class="n">name_to_list</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">name_to_list</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">==</span><span class="s1">&#39;&#39;</span> <span class="k">else</span> <span class="n">obj</span><span class="o">.</span><span class="n">name</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="n">h5py</span><span class="o">.</span><span class="n">Group</span><span class="p">):</span> <span class="c1"># Handle groups</span>
<span class="c1"># Convert attributes to a YAML/JSON serializable format</span>
<span class="n">attr_dict</span> <span class="o">=</span> <span class="p">{</span><span class="n">key</span><span class="p">:</span> <span class="n">utils</span><span class="o">.</span><span class="n">to_serializable_dtype</span><span class="p">(</span><span class="n">val</span><span class="p">)</span> <span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">val</span> <span class="ow">in</span> <span class="n">obj</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span>
<span class="c1"># Initialize the group dictionary</span>
<span class="n">group_dict</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="n">name_head</span><span class="p">,</span> <span class="s2">&quot;attributes&quot;</span><span class="p">:</span> <span class="n">attr_dict</span><span class="p">}</span>
<span class="c1"># Handle group members compactly</span>
<span class="c1">#subgroups = [member_name for member_name in obj if isinstance(obj[member_name], h5py.Group)]</span>
<span class="c1">#datasets = [member_name for member_name in obj if isinstance(obj[member_name], h5py.Dataset)]</span>
<span class="c1"># Summarize groups and datasets</span>
<span class="c1">#group_dict[&quot;content_summary&quot;] = {</span>
<span class="c1"># &quot;group_count&quot;: len(subgroups),</span>
<span class="c1"># &quot;group_preview&quot;: subgroups[:3] + ([&quot;...&quot;] if len(subgroups) &gt; 3 else []),</span>
<span class="c1"># &quot;dataset_count&quot;: len(datasets),</span>
<span class="c1"># &quot;dataset_preview&quot;: datasets[:3] + ([&quot;...&quot;] if len(datasets) &gt; 3 else [])</span>
<span class="c1">#}</span>
<span class="n">yaml_dict</span><span class="p">[</span><span class="n">obj</span><span class="o">.</span><span class="n">name</span><span class="p">]</span> <span class="o">=</span> <span class="n">group_dict</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="n">h5py</span><span class="o">.</span><span class="n">Dataset</span><span class="p">):</span> <span class="c1"># Handle datasets</span>
<span class="c1"># Convert attributes to a YAML/JSON serializable format</span>
<span class="n">attr_dict</span> <span class="o">=</span> <span class="p">{</span><span class="n">key</span><span class="p">:</span> <span class="n">utils</span><span class="o">.</span><span class="n">to_serializable_dtype</span><span class="p">(</span><span class="n">val</span><span class="p">)</span> <span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">val</span> <span class="ow">in</span> <span class="n">obj</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span>
<span class="n">dataset_dict</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="n">name_head</span><span class="p">,</span> <span class="s2">&quot;attributes&quot;</span><span class="p">:</span> <span class="n">attr_dict</span><span class="p">}</span>
<span class="n">yaml_dict</span><span class="p">[</span><span class="n">obj</span><span class="o">.</span><span class="n">name</span><span class="p">]</span> <span class="o">=</span> <span class="n">dataset_dict</span>
<div class="viewcode-block" id="serialize_metadata">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.serialize_metadata">[docs]</a>
<span class="k">def</span> <span class="nf">serialize_metadata</span><span class="p">(</span><span class="n">input_filename_path</span><span class="p">,</span> <span class="n">folder_depth</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">4</span><span class="p">,</span> <span class="n">output_format</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">&#39;yaml&#39;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Serialize metadata from an HDF5 file into YAML or JSON format.</span>
<span class="sd"> </span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> input_filename_path : str</span>
<span class="sd"> The path to the input HDF5 file.</span>
<span class="sd"> folder_depth : int, optional</span>
<span class="sd"> The folder depth to control how much of the HDF5 file hierarchy is traversed (default is 4).</span>
<span class="sd"> output_format : str, optional</span>
<span class="sd"> The format to serialize the output, either &#39;yaml&#39; or &#39;json&#39; (default is &#39;yaml&#39;).</span>
<span class="sd"> </span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> str</span>
<span class="sd"> The output file path where the serialized metadata is stored (either .yaml or .json).</span>
<span class="sd"> </span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># Choose the appropriate output format (YAML or JSON)</span>
<span class="k">if</span> <span class="n">output_format</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">&#39;yaml&#39;</span><span class="p">,</span> <span class="s1">&#39;json&#39;</span><span class="p">]:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Unsupported format. Please choose either &#39;yaml&#39; or &#39;json&#39;.&quot;</span><span class="p">)</span>
<span class="c1"># Initialize dictionary to store YAML/JSON data</span>
<span class="n">yaml_dict</span> <span class="o">=</span> <span class="p">{}</span>
<span class="c1"># Split input file path to get the output file&#39;s base name</span>
<span class="n">output_filename_tail</span><span class="p">,</span> <span class="n">ext</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">splitext</span><span class="p">(</span><span class="n">input_filename_path</span><span class="p">)</span>
<span class="c1"># Open the HDF5 file and extract metadata</span>
<span class="k">with</span> <span class="n">h5py</span><span class="o">.</span><span class="n">File</span><span class="p">(</span><span class="n">input_filename_path</span><span class="p">,</span> <span class="s1">&#39;r&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
<span class="c1"># Convert attribute dict to a YAML/JSON serializable dict</span>
<span class="c1">#attrs_dict = {key: utils.to_serializable_dtype(val) for key, val in f.attrs.items()} </span>
<span class="c1">#yaml_dict[f.name] = {</span>
<span class="c1"># &quot;name&quot;: f.name,</span>
<span class="c1"># &quot;attributes&quot;: attrs_dict,</span>
<span class="c1"># &quot;datasets&quot;: {}</span>
<span class="c1">#}</span>
<span class="n">__print_metadata__</span><span class="p">(</span><span class="n">f</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">f</span><span class="p">,</span> <span class="n">folder_depth</span><span class="p">,</span> <span class="n">yaml_dict</span><span class="p">)</span>
<span class="c1"># Traverse HDF5 file hierarchy and add datasets</span>
<span class="n">f</span><span class="o">.</span><span class="n">visititems</span><span class="p">(</span><span class="k">lambda</span> <span class="n">name</span><span class="p">,</span> <span class="n">obj</span><span class="p">:</span> <span class="n">__print_metadata__</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">obj</span><span class="p">,</span> <span class="n">folder_depth</span><span class="p">,</span> <span class="n">yaml_dict</span><span class="p">))</span>
<span class="c1"># Serialize and write the data</span>
<span class="n">output_file_path</span> <span class="o">=</span> <span class="n">output_filename_tail</span> <span class="o">+</span> <span class="s1">&#39;.&#39;</span> <span class="o">+</span> <span class="n">output_format</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file_path</span><span class="p">,</span> <span class="s1">&#39;w&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">output_file</span><span class="p">:</span>
<span class="k">if</span> <span class="n">output_format</span> <span class="o">==</span> <span class="s1">&#39;json&#39;</span><span class="p">:</span>
<span class="n">json_output</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">yaml_dict</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">sort_keys</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="n">output_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">json_output</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">output_format</span> <span class="o">==</span> <span class="s1">&#39;yaml&#39;</span><span class="p">:</span>
<span class="n">yaml_output</span> <span class="o">=</span> <span class="n">yaml</span><span class="o">.</span><span class="n">dump</span><span class="p">(</span><span class="n">yaml_dict</span><span class="p">,</span> <span class="n">sort_keys</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="n">output_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">yaml_output</span><span class="p">)</span>
<span class="k">return</span> <span class="n">output_file_path</span></div>
<div class="viewcode-block" id="get_groups_at_a_level">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.get_groups_at_a_level">[docs]</a>
<span class="k">def</span> <span class="nf">get_groups_at_a_level</span><span class="p">(</span><span class="n">file</span><span class="p">:</span> <span class="n">h5py</span><span class="o">.</span><span class="n">File</span><span class="p">,</span> <span class="n">level</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
<span class="n">groups</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">def</span> <span class="nf">node_selector</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">obj</span><span class="p">):</span>
<span class="k">if</span> <span class="n">name</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="s1">&#39;/&#39;</span><span class="p">)</span> <span class="o">==</span> <span class="n">level</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="n">name</span><span class="p">)</span>
<span class="n">groups</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">obj</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="n">file</span><span class="o">.</span><span class="n">visititems</span><span class="p">(</span><span class="n">node_selector</span><span class="p">)</span>
<span class="c1">#file.visititems()</span>
<span class="k">return</span> <span class="n">groups</span></div>
<div class="viewcode-block" id="read_mtable_as_dataframe">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.read_mtable_as_dataframe">[docs]</a>
<span class="k">def</span> <span class="nf">read_mtable_as_dataframe</span><span class="p">(</span><span class="n">filename</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Reconstruct a MATLAB Table encoded in a .h5 file as a Pandas DataFrame.</span>
<span class="sd"> This function reads a .h5 file containing a MATLAB Table and reconstructs it as a Pandas DataFrame. </span>
<span class="sd"> The input .h5 file contains one group per row of the MATLAB Table. Each group stores the table&#39;s </span>
<span class="sd"> dataset-like variables as Datasets, while categorical and numerical variables are represented as </span>
<span class="sd"> attributes of the respective group.</span>
<span class="sd"> To ensure homogeneity of data columns, the DataFrame is constructed column-wise.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> filename : str</span>
<span class="sd"> The name of the .h5 file. This may include the file&#39;s location and path information.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> pd.DataFrame</span>
<span class="sd"> The MATLAB Table reconstructed as a Pandas DataFrame.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1">#contructs dataframe by filling out entries columnwise. This way we can ensure homogenous data columns&quot;&quot;&quot;</span>
<span class="k">with</span> <span class="n">h5py</span><span class="o">.</span><span class="n">File</span><span class="p">(</span><span class="n">filename</span><span class="p">,</span><span class="s1">&#39;r&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">file</span><span class="p">:</span>
<span class="c1"># Define group&#39;s attributes and datasets. This should hold</span>
<span class="c1"># for all groups. TODO: implement verification and noncompliance error if needed.</span>
<span class="n">group_list</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">file</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
<span class="n">group_attrs</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">file</span><span class="p">[</span><span class="n">group_list</span><span class="p">[</span><span class="mi">0</span><span class="p">]]</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
<span class="c1"># </span>
<span class="n">column_attr_names</span> <span class="o">=</span> <span class="p">[</span><span class="n">item</span><span class="p">[</span><span class="n">item</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">&#39;_&#39;</span><span class="p">)</span><span class="o">+</span><span class="mi">1</span><span class="p">::]</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">group_attrs</span><span class="p">]</span>
<span class="n">column_attr_names_idx</span> <span class="o">=</span> <span class="p">[</span><span class="nb">int</span><span class="p">(</span><span class="n">item</span><span class="p">[</span><span class="mi">4</span><span class="p">:(</span><span class="n">item</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">&#39;_&#39;</span><span class="p">))])</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">group_attrs</span><span class="p">]</span>
<span class="n">group_datasets</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">file</span><span class="p">[</span><span class="n">group_list</span><span class="p">[</span><span class="mi">0</span><span class="p">]]</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span> <span class="k">if</span> <span class="ow">not</span> <span class="s1">&#39;DS_EMPTY&#39;</span> <span class="ow">in</span> <span class="n">file</span><span class="p">[</span><span class="n">group_list</span><span class="p">[</span><span class="mi">0</span><span class="p">]]</span><span class="o">.</span><span class="n">keys</span><span class="p">()</span> <span class="k">else</span> <span class="p">[]</span>
<span class="c1">#</span>
<span class="n">column_dataset_names</span> <span class="o">=</span> <span class="p">[</span><span class="n">file</span><span class="p">[</span><span class="n">group_list</span><span class="p">[</span><span class="mi">0</span><span class="p">]][</span><span class="n">item</span><span class="p">]</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">&#39;column_name&#39;</span><span class="p">]</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">group_datasets</span><span class="p">]</span>
<span class="n">column_dataset_names_idx</span> <span class="o">=</span> <span class="p">[</span><span class="nb">int</span><span class="p">(</span><span class="n">item</span><span class="p">[</span><span class="mi">2</span><span class="p">:])</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">group_datasets</span><span class="p">]</span>
<span class="c1"># Define data_frame as group_attrs + group_datasets</span>
<span class="c1">#pd_series_index = group_attrs + group_datasets</span>
<span class="n">pd_series_index</span> <span class="o">=</span> <span class="n">column_attr_names</span> <span class="o">+</span> <span class="n">column_dataset_names</span>
<span class="n">output_dataframe</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">columns</span><span class="o">=</span><span class="n">pd_series_index</span><span class="p">,</span><span class="n">index</span><span class="o">=</span><span class="n">group_list</span><span class="p">)</span>
<span class="n">tmp_col</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">meas_prop</span> <span class="ow">in</span> <span class="n">group_attrs</span> <span class="o">+</span> <span class="n">group_datasets</span><span class="p">:</span>
<span class="k">if</span> <span class="n">meas_prop</span> <span class="ow">in</span> <span class="n">group_attrs</span><span class="p">:</span>
<span class="n">column_label</span> <span class="o">=</span> <span class="n">meas_prop</span><span class="p">[</span><span class="n">meas_prop</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">&#39;_&#39;</span><span class="p">)</span><span class="o">+</span><span class="mi">1</span><span class="p">:]</span>
<span class="c1"># Create numerical or categorical column from group&#39;s attributes</span>
<span class="n">tmp_col</span> <span class="o">=</span> <span class="p">[</span><span class="n">file</span><span class="p">[</span><span class="n">group_key</span><span class="p">]</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="n">meas_prop</span><span class="p">][()][</span><span class="mi">0</span><span class="p">]</span> <span class="k">for</span> <span class="n">group_key</span> <span class="ow">in</span> <span class="n">group_list</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># Create dataset column from group&#39;s datasets</span>
<span class="n">column_label</span> <span class="o">=</span> <span class="n">file</span><span class="p">[</span><span class="n">group_list</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">+</span> <span class="s1">&#39;/&#39;</span> <span class="o">+</span> <span class="n">meas_prop</span><span class="p">]</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">&#39;column_name&#39;</span><span class="p">]</span>
<span class="c1">#tmp_col = [file[group_key + &#39;/&#39; + meas_prop][()][0] for group_key in group_list]</span>
<span class="n">tmp_col</span> <span class="o">=</span> <span class="p">[</span><span class="n">file</span><span class="p">[</span><span class="n">group_key</span> <span class="o">+</span> <span class="s1">&#39;/&#39;</span> <span class="o">+</span> <span class="n">meas_prop</span><span class="p">][()]</span> <span class="k">for</span> <span class="n">group_key</span> <span class="ow">in</span> <span class="n">group_list</span><span class="p">]</span>
<span class="n">output_dataframe</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span><span class="n">column_label</span><span class="p">]</span> <span class="o">=</span> <span class="n">tmp_col</span>
<span class="k">return</span> <span class="n">output_dataframe</span></div>
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">&quot;__main__&quot;</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">)</span> <span class="o">&lt;</span> <span class="mi">5</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Usage: python hdf5_ops.py serialize &lt;path/to/target_file.hdf5&gt; &lt;folder_depth : int = 2&gt; &lt;format=json|yaml&gt;&quot;</span><span class="p">)</span>
<span class="n">sys</span><span class="o">.</span><span class="n">exit</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">==</span> <span class="s1">&#39;serialize&#39;</span><span class="p">:</span>
<span class="n">input_hdf5_file</span> <span class="o">=</span> <span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span>
<span class="n">folder_depth</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">[</span><span class="mi">3</span><span class="p">])</span>
<span class="n">file_format</span> <span class="o">=</span> <span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">[</span><span class="mi">4</span><span class="p">]</span>
<span class="k">try</span><span class="p">:</span>
<span class="c1"># Call the serialize_metadata function and capture the output path</span>
<span class="n">path_to_file</span> <span class="o">=</span> <span class="n">serialize_metadata</span><span class="p">(</span><span class="n">input_hdf5_file</span><span class="p">,</span>
<span class="n">folder_depth</span> <span class="o">=</span> <span class="n">folder_depth</span><span class="p">,</span>
<span class="n">output_format</span><span class="o">=</span><span class="n">file_format</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Metadata serialized to </span><span class="si">{</span><span class="n">path_to_file</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;An error occurred during serialization: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="n">sys</span><span class="o">.</span><span class="n">exit</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="c1">#run(sys.argv[2])</span>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, JFFO.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>