Files
dima/docs/build/html/modules/src.html

460 lines
38 KiB
HTML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../">
<head>
<meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>HDF5 Data Operations &mdash; DIMA 1.0.0 documentation</title>
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../_static/jquery.js?v=5d32c60e"></script>
<script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../_static/documentation_options.js?v=8d563738"></script>
<script src="../_static/doctools.js?v=9a2dae69"></script>
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script src="../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Pipelines and workflows" href="pipelines.html" />
<link rel="prev" title="Welcome to DIMAs documentation!" href="../index.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../index.html" class="icon icon-home">
DIMA
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul class="current">
<li class="toctree-l1 current"><a class="current reference internal" href="#">HDF5 Data Operations</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#src.hdf5_ops.HDF5DataOpsManager"><code class="docutils literal notranslate"><span class="pre">HDF5DataOpsManager</span></code></a><ul>
<li class="toctree-l3"><a class="reference internal" href="#src.hdf5_ops.HDF5DataOpsManager.append_dataset"><code class="docutils literal notranslate"><span class="pre">HDF5DataOpsManager.append_dataset()</span></code></a></li>
<li class="toctree-l3"><a class="reference internal" href="#src.hdf5_ops.HDF5DataOpsManager.append_metadata"><code class="docutils literal notranslate"><span class="pre">HDF5DataOpsManager.append_metadata()</span></code></a></li>
<li class="toctree-l3"><a class="reference internal" href="#src.hdf5_ops.HDF5DataOpsManager.delete_metadata"><code class="docutils literal notranslate"><span class="pre">HDF5DataOpsManager.delete_metadata()</span></code></a></li>
<li class="toctree-l3"><a class="reference internal" href="#src.hdf5_ops.HDF5DataOpsManager.extract_and_load_dataset_metadata"><code class="docutils literal notranslate"><span class="pre">HDF5DataOpsManager.extract_and_load_dataset_metadata()</span></code></a></li>
<li class="toctree-l3"><a class="reference internal" href="#src.hdf5_ops.HDF5DataOpsManager.extract_dataset_as_dataframe"><code class="docutils literal notranslate"><span class="pre">HDF5DataOpsManager.extract_dataset_as_dataframe()</span></code></a></li>
<li class="toctree-l3"><a class="reference internal" href="#src.hdf5_ops.HDF5DataOpsManager.get_metadata"><code class="docutils literal notranslate"><span class="pre">HDF5DataOpsManager.get_metadata()</span></code></a></li>
<li class="toctree-l3"><a class="reference internal" href="#src.hdf5_ops.HDF5DataOpsManager.load_file_obj"><code class="docutils literal notranslate"><span class="pre">HDF5DataOpsManager.load_file_obj()</span></code></a></li>
<li class="toctree-l3"><a class="reference internal" href="#src.hdf5_ops.HDF5DataOpsManager.reformat_datetime_column"><code class="docutils literal notranslate"><span class="pre">HDF5DataOpsManager.reformat_datetime_column()</span></code></a></li>
<li class="toctree-l3"><a class="reference internal" href="#src.hdf5_ops.HDF5DataOpsManager.rename_metadata"><code class="docutils literal notranslate"><span class="pre">HDF5DataOpsManager.rename_metadata()</span></code></a></li>
<li class="toctree-l3"><a class="reference internal" href="#src.hdf5_ops.HDF5DataOpsManager.unload_file_obj"><code class="docutils literal notranslate"><span class="pre">HDF5DataOpsManager.unload_file_obj()</span></code></a></li>
<li class="toctree-l3"><a class="reference internal" href="#src.hdf5_ops.HDF5DataOpsManager.update_file"><code class="docutils literal notranslate"><span class="pre">HDF5DataOpsManager.update_file()</span></code></a></li>
<li class="toctree-l3"><a class="reference internal" href="#src.hdf5_ops.HDF5DataOpsManager.update_metadata"><code class="docutils literal notranslate"><span class="pre">HDF5DataOpsManager.update_metadata()</span></code></a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#src.hdf5_ops.get_groups_at_a_level"><code class="docutils literal notranslate"><span class="pre">get_groups_at_a_level()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#src.hdf5_ops.get_parent_child_relationships"><code class="docutils literal notranslate"><span class="pre">get_parent_child_relationships()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#src.hdf5_ops.read_mtable_as_dataframe"><code class="docutils literal notranslate"><span class="pre">read_mtable_as_dataframe()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#src.hdf5_ops.serialize_metadata"><code class="docutils literal notranslate"><span class="pre">serialize_metadata()</span></code></a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="#module-src.hdf5_writer">HDF5 Writer</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#src.hdf5_writer.create_hdf5_file_from_dataframe"><code class="docutils literal notranslate"><span class="pre">create_hdf5_file_from_dataframe()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#src.hdf5_writer.create_hdf5_file_from_filesystem_path"><code class="docutils literal notranslate"><span class="pre">create_hdf5_file_from_filesystem_path()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#src.hdf5_writer.save_processed_dataframe_to_hdf5"><code class="docutils literal notranslate"><span class="pre">save_processed_dataframe_to_hdf5()</span></code></a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="pipelines.html">Pipelines and workflows</a></li>
<li class="toctree-l1"><a class="reference internal" href="vis.html">Data Visualization</a></li>
<li class="toctree-l1"><a class="reference internal" href="utils.html">Data Structure Conversion</a></li>
<li class="toctree-l1"><a class="reference internal" href="notebooks.html">Notebooks</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../index.html">DIMA</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item active">HDF5 Data Operations</li>
<li class="wy-breadcrumbs-aside">
<a href="../_sources/modules/src.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<section id="module-src.hdf5_ops">
<span id="hdf5-data-operations"></span><h1>HDF5 Data Operations<a class="headerlink" href="#module-src.hdf5_ops" title="Link to this heading"></a></h1>
<dl class="py class">
<dt class="sig sig-object py" id="src.hdf5_ops.HDF5DataOpsManager">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">src.hdf5_ops.</span></span><span class="sig-name descname"><span class="pre">HDF5DataOpsManager</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">file_path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mode</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'r+'</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#HDF5DataOpsManager"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.HDF5DataOpsManager" title="Link to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
<p>A class to handle HDF5 fundamental middle level file operations to power data updates, metadata revision, and data analysis
with hdf5 files encoding multi-instrument experimental campaign data.</p>
<section id="parameters">
<h2>Parameters:<a class="headerlink" href="#parameters" title="Link to this heading"></a></h2>
<blockquote>
<div><dl class="simple">
<dt>path_to_file<span class="classifier">str</span></dt><dd><p>path/to/hdf5file.</p>
</dd>
<dt>mode<span class="classifier">str</span></dt><dd><p>r or r+ read or read/write mode only when file exists</p>
</dd>
</dl>
</div></blockquote>
<dl class="py method">
<dt class="sig sig-object py" id="src.hdf5_ops.HDF5DataOpsManager.append_dataset">
<span class="sig-name descname"><span class="pre">append_dataset</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset_dict</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">group_name</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#HDF5DataOpsManager.append_dataset"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.HDF5DataOpsManager.append_dataset" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="src.hdf5_ops.HDF5DataOpsManager.append_metadata">
<span class="sig-name descname"><span class="pre">append_metadata</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">obj_name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">annotation_dict</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#HDF5DataOpsManager.append_metadata"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.HDF5DataOpsManager.append_metadata" title="Link to this definition"></a></dt>
<dd><p>Appends metadata attributes to the specified object (obj_name) based on the provided annotation_dict.</p>
<p>This method ensures that the provided metadata attributes do not overwrite any existing ones. If an attribute already exists,
a ValueError is raised. The function supports storing scalar values (int, float, str) and compound values such as dictionaries
that are converted into NumPy structured arrays before being added to the metadata.</p>
<section id="id1">
<h3>Parameters:<a class="headerlink" href="#id1" title="Link to this heading"></a></h3>
<dl>
<dt>obj_name: str</dt><dd><p>Path to the target object (dataset or group) within the HDF5 file.</p>
</dd>
<dt>annotation_dict: dict</dt><dd><dl>
<dt>A dictionary where the keys represent new attribute names (strings), and the values can be:</dt><dd><ul class="simple">
<li><p>Scalars: int, float, or str.</p></li>
<li><p>Compound values (dictionaries) for more complex metadata, which are converted to NumPy structured arrays.</p></li>
</ul>
<p>Example of a compound value:</p>
<dl>
<dt>annotation_dict = {</dt><dd><dl class="simple">
<dt>“relative_humidity”: {</dt><dd><p>“value”: 65,
“units”: “percentage”,
“range”: “[0,100]”,
“definition”: “amount of water vapor present …”</p>
</dd>
</dl>
<p>}</p>
</dd>
</dl>
<p>}</p>
</dd>
</dl>
</dd>
</dl>
</section>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="src.hdf5_ops.HDF5DataOpsManager.delete_metadata">
<span class="sig-name descname"><span class="pre">delete_metadata</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">obj_name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">annotation_dict</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#HDF5DataOpsManager.delete_metadata"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.HDF5DataOpsManager.delete_metadata" title="Link to this definition"></a></dt>
<dd><p>Deletes metadata attributes of the specified object (obj_name) based on the provided annotation_dict.</p>
<section id="id2">
<h3>Parameters:<a class="headerlink" href="#id2" title="Link to this heading"></a></h3>
<dl class="simple">
<dt>obj_name: str</dt><dd><p>Path to the target object (dataset or group) within the HDF5 file.</p>
</dd>
<dt>annotation_dict: dict</dt><dd><p>Dictionary where keys represent attribute names, and values should be dictionaries containing
{“delete”: True} to mark them for deletion.</p>
</dd>
</dl>
</section>
<section id="example">
<h3>Example:<a class="headerlink" href="#example" title="Link to this heading"></a></h3>
<p>annotation_dict = {“attr_to_be_deleted”: {“delete”: True}}</p>
</section>
<section id="behavior">
<h3>Behavior:<a class="headerlink" href="#behavior" title="Link to this heading"></a></h3>
<ul class="simple">
<li><p>Deletes the specified attributes from the objects metadata if marked for deletion.</p></li>
<li><p>Issues a warning if the attribute is not found or not marked for deletion.</p></li>
</ul>
</section>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="src.hdf5_ops.HDF5DataOpsManager.extract_and_load_dataset_metadata">
<span class="sig-name descname"><span class="pre">extract_and_load_dataset_metadata</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#HDF5DataOpsManager.extract_and_load_dataset_metadata"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.HDF5DataOpsManager.extract_and_load_dataset_metadata" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="src.hdf5_ops.HDF5DataOpsManager.extract_dataset_as_dataframe">
<span class="sig-name descname"><span class="pre">extract_dataset_as_dataframe</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset_name</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#HDF5DataOpsManager.extract_dataset_as_dataframe"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.HDF5DataOpsManager.extract_dataset_as_dataframe" title="Link to this definition"></a></dt>
<dd><p>returns a copy of the dataset content in the form of dataframe when possible or numpy array</p>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="src.hdf5_ops.HDF5DataOpsManager.get_metadata">
<span class="sig-name descname"><span class="pre">get_metadata</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">obj_path</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#HDF5DataOpsManager.get_metadata"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.HDF5DataOpsManager.get_metadata" title="Link to this definition"></a></dt>
<dd><p>Get file attributes from object at path = obj_path. For example,
obj_path = / will get root level attributes or metadata.</p>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="src.hdf5_ops.HDF5DataOpsManager.load_file_obj">
<span class="sig-name descname"><span class="pre">load_file_obj</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#HDF5DataOpsManager.load_file_obj"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.HDF5DataOpsManager.load_file_obj" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="src.hdf5_ops.HDF5DataOpsManager.reformat_datetime_column">
<span class="sig-name descname"><span class="pre">reformat_datetime_column</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset_name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">column_name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">src_format</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">desired_format</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'%Y-%m-%d</span> <span class="pre">%H:%M:%S.%f'</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#HDF5DataOpsManager.reformat_datetime_column"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.HDF5DataOpsManager.reformat_datetime_column" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="src.hdf5_ops.HDF5DataOpsManager.rename_metadata">
<span class="sig-name descname"><span class="pre">rename_metadata</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">obj_name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">renaming_map</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#HDF5DataOpsManager.rename_metadata"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.HDF5DataOpsManager.rename_metadata" title="Link to this definition"></a></dt>
<dd><p>Renames metadata attributes of the specified object (obj_name) based on the provided renaming_map.</p>
<section id="id3">
<h3>Parameters:<a class="headerlink" href="#id3" title="Link to this heading"></a></h3>
<dl>
<dt>obj_name: str</dt><dd><p>Path to the target object (dataset or group) within the HDF5 file.</p>
</dd>
<dt>renaming_map: dict</dt><dd><p>A dictionary where keys are current attribute names (strings), and values are the new attribute names (strings or byte strings) to rename to.</p>
<dl class="simple">
<dt>renaming_map = {</dt><dd><p>“old_attr_name”: “new_attr_name”,
“old_attr_2”: “new_attr_2”</p>
</dd>
</dl>
<p>}</p>
</dd>
</dl>
</section>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="src.hdf5_ops.HDF5DataOpsManager.unload_file_obj">
<span class="sig-name descname"><span class="pre">unload_file_obj</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#HDF5DataOpsManager.unload_file_obj"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.HDF5DataOpsManager.unload_file_obj" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="src.hdf5_ops.HDF5DataOpsManager.update_file">
<span class="sig-name descname"><span class="pre">update_file</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path_to_append_dir</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#HDF5DataOpsManager.update_file"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.HDF5DataOpsManager.update_file" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="src.hdf5_ops.HDF5DataOpsManager.update_metadata">
<span class="sig-name descname"><span class="pre">update_metadata</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">obj_name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">annotation_dict</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#HDF5DataOpsManager.update_metadata"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.HDF5DataOpsManager.update_metadata" title="Link to this definition"></a></dt>
<dd><p>Updates the value of existing metadata attributes of the specified object (obj_name) based on the provided annotation_dict.</p>
<p>The function disregards non-existing attributes and suggests to use the append_metadata() method to include those in the metadata.</p>
<section id="id4">
<h3>Parameters:<a class="headerlink" href="#id4" title="Link to this heading"></a></h3>
<dl>
<dt>obj_name<span class="classifier">str</span></dt><dd><p>Path to the target object (dataset or group) within the HDF5 file.</p>
</dd>
<dt>annotation_dict: dict</dt><dd><dl>
<dt>A dictionary where the keys represent existing attribute names (strings), and the values can be:</dt><dd><ul class="simple">
<li><p>Scalars: int, float, or str.</p></li>
<li><p>Compound values (dictionaries) for more complex metadata, which are converted to NumPy structured arrays.</p></li>
</ul>
<p>Example of a compound value:</p>
<dl>
<dt>annotation_dict = {</dt><dd><dl class="simple">
<dt>“relative_humidity”: {</dt><dd><p>“value”: 65,
“units”: “percentage”,
“range”: “[0,100]”,
“definition”: “amount of water vapor present …”</p>
</dd>
</dl>
<p>}</p>
</dd>
</dl>
<p>}</p>
</dd>
</dl>
</dd>
</dl>
</section>
</dd></dl>
</section>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="src.hdf5_ops.get_groups_at_a_level">
<span class="sig-prename descclassname"><span class="pre">src.hdf5_ops.</span></span><span class="sig-name descname"><span class="pre">get_groups_at_a_level</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">file</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">File</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">level</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#get_groups_at_a_level"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.get_groups_at_a_level" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="src.hdf5_ops.get_parent_child_relationships">
<span class="sig-prename descclassname"><span class="pre">src.hdf5_ops.</span></span><span class="sig-name descname"><span class="pre">get_parent_child_relationships</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">file</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">File</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#get_parent_child_relationships"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.get_parent_child_relationships" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="src.hdf5_ops.read_mtable_as_dataframe">
<span class="sig-prename descclassname"><span class="pre">src.hdf5_ops.</span></span><span class="sig-name descname"><span class="pre">read_mtable_as_dataframe</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">filename</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#read_mtable_as_dataframe"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.read_mtable_as_dataframe" title="Link to this definition"></a></dt>
<dd><p>Reconstruct a MATLAB Table encoded in a .h5 file as a Pandas DataFrame.</p>
<p>This function reads a .h5 file containing a MATLAB Table and reconstructs it as a Pandas DataFrame.
The input .h5 file contains one group per row of the MATLAB Table. Each group stores the tables
dataset-like variables as Datasets, while categorical and numerical variables are represented as
attributes of the respective group.</p>
<p>To ensure homogeneity of data columns, the DataFrame is constructed column-wise.</p>
<section id="id5">
<h2>Parameters<a class="headerlink" href="#id5" title="Link to this heading"></a></h2>
<dl class="simple">
<dt>filename<span class="classifier">str</span></dt><dd><p>The name of the .h5 file. This may include the files location and path information.</p>
</dd>
</dl>
</section>
<section id="returns">
<h2>Returns<a class="headerlink" href="#returns" title="Link to this heading"></a></h2>
<dl class="simple">
<dt>pd.DataFrame</dt><dd><p>The MATLAB Table reconstructed as a Pandas DataFrame.</p>
</dd>
</dl>
</section>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="src.hdf5_ops.serialize_metadata">
<span class="sig-prename descclassname"><span class="pre">src.hdf5_ops.</span></span><span class="sig-name descname"><span class="pre">serialize_metadata</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_filename_path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">folder_depth</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">4</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_format</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'yaml'</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">str</span></span></span><a class="reference internal" href="../_modules/src/hdf5_ops.html#serialize_metadata"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.serialize_metadata" title="Link to this definition"></a></dt>
<dd><p>Serialize metadata from an HDF5 file into YAML or JSON format.</p>
<section id="id6">
<h2>Parameters<a class="headerlink" href="#id6" title="Link to this heading"></a></h2>
<dl class="simple">
<dt>input_filename_path<span class="classifier">str</span></dt><dd><p>The path to the input HDF5 file.</p>
</dd>
<dt>folder_depth<span class="classifier">int, optional</span></dt><dd><p>The folder depth to control how much of the HDF5 file hierarchy is traversed (default is 4).</p>
</dd>
<dt>output_format<span class="classifier">str, optional</span></dt><dd><p>The format to serialize the output, either yaml or json (default is yaml).</p>
</dd>
</dl>
</section>
<section id="id7">
<h2>Returns<a class="headerlink" href="#id7" title="Link to this heading"></a></h2>
<dl class="simple">
<dt>str</dt><dd><p>The output file path where the serialized metadata is stored (either .yaml or .json).</p>
</dd>
</dl>
</section>
</dd></dl>
</section>
<section id="module-src.hdf5_writer">
<span id="hdf5-writer"></span><h1>HDF5 Writer<a class="headerlink" href="#module-src.hdf5_writer" title="Link to this heading"></a></h1>
<dl class="py function">
<dt class="sig sig-object py" id="src.hdf5_writer.create_hdf5_file_from_dataframe">
<span class="sig-prename descclassname"><span class="pre">src.hdf5_writer.</span></span><span class="sig-name descname"><span class="pre">create_hdf5_file_from_dataframe</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">ofilename</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">input_data</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">group_by_funcs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">approach</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">extract_attrs_func</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_writer.html#create_hdf5_file_from_dataframe"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_writer.create_hdf5_file_from_dataframe" title="Link to this definition"></a></dt>
<dd><p>Creates an HDF5 file with hierarchical groups based on the specified grouping functions or columns.</p>
<section id="id8">
<h2>Parameters:<a class="headerlink" href="#id8" title="Link to this heading"></a></h2>
<blockquote>
<div><p>ofilename (str): Path for the output HDF5 file.
input_data (pd.DataFrame or str): Input data as a DataFrame or a valid file system path.
group_by_funcs (list): List of callables or column names to define hierarchical grouping.
approach (str): Specifies the approach (top-down or bottom-up) for creating the HDF5 file.
extract_attrs_func (callable, optional): Function to extract additional attributes for HDF5 groups.</p>
</div></blockquote>
</section>
<section id="id9">
<h2>Returns:<a class="headerlink" href="#id9" title="Link to this heading"></a></h2>
<blockquote>
<div><p>None</p>
</div></blockquote>
</section>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="src.hdf5_writer.create_hdf5_file_from_filesystem_path">
<span class="sig-prename descclassname"><span class="pre">src.hdf5_writer.</span></span><span class="sig-name descname"><span class="pre">create_hdf5_file_from_filesystem_path</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path_to_input_directory</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">path_to_filenames_dict</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">dict</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">select_dir_keywords</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">[]</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">root_metadata_dict</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">dict</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">{}</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mode</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'w'</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_writer.html#create_hdf5_file_from_filesystem_path"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_writer.create_hdf5_file_from_filesystem_path" title="Link to this definition"></a></dt>
<dd><p>Creates an .h5 file with name “output_filename” that preserves the directory tree (or folder structure)
of a given filesystem path.</p>
<p>The data integration capabilities are limited by our file reader, which can only access data from a list of
admissible file formats. These, however, can be extended. Directories are groups in the resulting HDF5 file.
Files are formatted as composite objects consisting of a group, file, and attributes.</p>
<section id="id10">
<h2>Parameters<a class="headerlink" href="#id10" title="Link to this heading"></a></h2>
<dl class="simple">
<dt>output_filename<span class="classifier">str</span></dt><dd><p>Name of the output HDF5 file.</p>
</dd>
<dt>path_to_input_directory<span class="classifier">str</span></dt><dd><p>Path to root directory, specified with forward slashes, e.g., path/to/root.</p>
</dd>
<dt>path_to_filenames_dict<span class="classifier">dict, optional</span></dt><dd><p>A pre-processed dictionary where keys are directory paths on the input directorys tree and values are lists of files.
If provided, input_file_system_path is ignored.</p>
</dd>
<dt>select_dir_keywords<span class="classifier">list</span></dt><dd><dl class="simple">
<dt>List of string elements to consider or select only directory paths that contain</dt><dd><p>a word in select_dir_keywords. When empty, all directory paths are considered
to be included in the HDF5 file group hierarchy.</p>
</dd>
</dl>
</dd>
<dt>root_metadata_dict<span class="classifier">dict</span></dt><dd><p>Metadata to include at the root level of the HDF5 file.</p>
</dd>
<dt>mode<span class="classifier">str</span></dt><dd><p>w create File, truncate if it exists, or r+ read/write, File must exists. By default, mode = “w”.</p>
</dd>
</dl>
</section>
<section id="id11">
<h2>Returns<a class="headerlink" href="#id11" title="Link to this heading"></a></h2>
<dl class="simple">
<dt>output_filename<span class="classifier">str</span></dt><dd><p>Path to the created HDF5 file.</p>
</dd>
</dl>
</section>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="src.hdf5_writer.save_processed_dataframe_to_hdf5">
<span class="sig-prename descclassname"><span class="pre">src.hdf5_writer.</span></span><span class="sig-name descname"><span class="pre">save_processed_dataframe_to_hdf5</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">df</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">annotator</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_filename</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_writer.html#save_processed_dataframe_to_hdf5"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_writer.save_processed_dataframe_to_hdf5" title="Link to this definition"></a></dt>
<dd><p>Save processed dataframe columns with annotations to an HDF5 file.</p>
<dl class="simple">
<dt>Parameters:</dt><dd><p>df (pd.DataFrame): DataFrame containing processed time series.
annotator (): Annotator object with get_metadata method.
output_filename (str): Path to the source HDF5 file.</p>
</dd>
</dl>
</dd></dl>
</section>
</div>
</div>
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
<a href="../index.html" class="btn btn-neutral float-left" title="Welcome to DIMAs documentation!" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
<a href="pipelines.html" class="btn btn-neutral float-right" title="Pipelines and workflows" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
</div>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, JFFO.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>