Add draft of dima documentation

This commit is contained in:
2024-11-26 13:40:43 +01:00
parent 11ca454b94
commit f4f27b7084
88 changed files with 12382 additions and 0 deletions

20
docs/Makefile Normal file
View File

@@ -0,0 +1,20 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = source
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

BIN
docs/build/doctrees/environment.pickle vendored Normal file

Binary file not shown.

BIN
docs/build/doctrees/index.doctree vendored Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
docs/build/doctrees/modules/src.doctree vendored Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

4
docs/build/html/.buildinfo vendored Normal file
View File

@@ -0,0 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
config: 19124d911d3357bfa2d9f6131966b4c9
tags: 645f666f9bcd5a90fca523b33c5a78b7

114
docs/build/html/_modules/index.html vendored Normal file
View File

@@ -0,0 +1,114 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Overview: module code &mdash; DIMA 1.0.0 documentation</title>
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../_static/jquery.js?v=5d32c60e"></script>
<script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../_static/documentation_options.js?v=8d563738"></script>
<script src="../_static/doctools.js?v=9a2dae69"></script>
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script src="../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../index.html" class="icon icon-home">
DIMA
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../modules/src.html">HDF5 Data Operations</a></li>
<li class="toctree-l1"><a class="reference internal" href="../modules/src.html#module-src.hdf5_writer">HDF5 Writer</a></li>
<li class="toctree-l1"><a class="reference internal" href="../modules/src.html#module-src.hdf5_vis">Data Visualization</a></li>
<li class="toctree-l1"><a class="reference internal" href="../modules/pipelines.html">Pipelines and workflows</a></li>
<li class="toctree-l1"><a class="reference internal" href="../modules/utils.html">Data Structure Conversion</a></li>
<li class="toctree-l1"><a class="reference internal" href="../modules/notebooks.html">Notebooks</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../index.html">DIMA</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item active">Overview: module code</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>All modules for which code is available</h1>
<ul><li><a href="pipelines/data_integration.html">pipelines.data_integration</a></li>
<li><a href="pipelines/metadata_revision.html">pipelines.metadata_revision</a></li>
<li><a href="src/hdf5_ops.html">src.hdf5_ops</a></li>
<li><a href="src/hdf5_vis.html">src.hdf5_vis</a></li>
<li><a href="src/hdf5_writer.html">src.hdf5_writer</a></li>
<li><a href="utils/g5505_utils.html">utils.g5505_utils</a></li>
</ul>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, JFFO.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@@ -0,0 +1,359 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>pipelines.data_integration &mdash; DIMA 1.0.0 documentation</title>
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../_static/documentation_options.js?v=8d563738"></script>
<script src="../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script src="../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../index.html" class="icon icon-home">
DIMA
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html">HDF5 data operations</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.hdf5_writer">Data integration with HDF5</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.hdf5_vis">Data visualization</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/pipelines.html">Pipelines and workflows</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/utils.html">Utilities</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../index.html">DIMA</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
<li class="breadcrumb-item active">pipelines.data_integration</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for pipelines.data_integration</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">sys</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">thisFilePath</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="vm">__file__</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">NameError</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Error: __file__ is not available. Ensure the script is being run from a file.&quot;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;[Notice] Path to DIMA package may not be resolved properly.&quot;</span><span class="p">)</span>
<span class="n">thisFilePath</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">getcwd</span><span class="p">()</span> <span class="c1"># Use current directory or specify a default</span>
<span class="n">dimaPath</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">normpath</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">thisFilePath</span><span class="p">,</span> <span class="s2">&quot;..&quot;</span><span class="p">,</span><span class="s1">&#39;..&#39;</span><span class="p">))</span> <span class="c1"># Move up to project root</span>
<span class="k">if</span> <span class="n">dimaPath</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">sys</span><span class="o">.</span><span class="n">path</span><span class="p">:</span> <span class="c1"># Avoid duplicate entries</span>
<span class="n">sys</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">dimaPath</span><span class="p">)</span>
<span class="kn">import</span> <span class="nn">yaml</span>
<span class="kn">import</span> <span class="nn">logging</span>
<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span>
<span class="c1"># Importing chain class from itertools </span>
<span class="kn">from</span> <span class="nn">itertools</span> <span class="kn">import</span> <span class="n">chain</span>
<span class="c1"># Import DIMA modules</span>
<span class="kn">import</span> <span class="nn">src.hdf5_writer</span> <span class="k">as</span> <span class="nn">hdf5_lib</span>
<span class="kn">import</span> <span class="nn">utils.g5505_utils</span> <span class="k">as</span> <span class="nn">utils</span>
<span class="kn">from</span> <span class="nn">instruments.readers</span> <span class="kn">import</span> <span class="n">filereader_registry</span>
<span class="n">allowed_file_extensions</span> <span class="o">=</span> <span class="n">filereader_registry</span><span class="o">.</span><span class="n">file_extensions</span>
<span class="k">def</span> <span class="nf">_generate_datetime_dict</span><span class="p">(</span><span class="n">datetime_steps</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot; Generate the datetime augment dictionary from datetime steps. &quot;&quot;&quot;</span>
<span class="n">datetime_augment_dict</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">for</span> <span class="n">datetime_step</span> <span class="ow">in</span> <span class="n">datetime_steps</span><span class="p">:</span>
<span class="c1">#tmp = datetime.strptime(datetime_step, &#39;%Y-%m-%d %H-%M-%S&#39;)</span>
<span class="n">datetime_augment_dict</span><span class="p">[</span><span class="n">datetime_step</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span>
<span class="n">datetime_step</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="s1">&#39;%Y-%m-</span><span class="si">%d</span><span class="s1">&#39;</span><span class="p">),</span> <span class="n">datetime_step</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="s1">&#39;%Y_%m_</span><span class="si">%d</span><span class="s1">&#39;</span><span class="p">),</span> <span class="n">datetime_step</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="s1">&#39;%Y.%m.</span><span class="si">%d</span><span class="s1">&#39;</span><span class="p">),</span> <span class="n">datetime_step</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="s1">&#39;%Y%m</span><span class="si">%d</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="p">]</span>
<span class="k">return</span> <span class="n">datetime_augment_dict</span>
<div class="viewcode-block" id="load_config_and_setup_logging">
<a class="viewcode-back" href="../../modules/pipelines.html#pipelines.data_integration.load_config_and_setup_logging">[docs]</a>
<span class="k">def</span> <span class="nf">load_config_and_setup_logging</span><span class="p">(</span><span class="n">yaml_config_file_path</span><span class="p">,</span> <span class="n">log_dir</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Load YAML configuration file, set up logging, and validate required keys and datetime_steps.&quot;&quot;&quot;</span>
<span class="c1"># Define required keys</span>
<span class="n">required_keys</span> <span class="o">=</span> <span class="p">[</span>
<span class="s1">&#39;experiment&#39;</span><span class="p">,</span> <span class="s1">&#39;contact&#39;</span><span class="p">,</span> <span class="s1">&#39;input_file_directory&#39;</span><span class="p">,</span> <span class="s1">&#39;output_file_directory&#39;</span><span class="p">,</span>
<span class="s1">&#39;instrument_datafolder&#39;</span><span class="p">,</span> <span class="s1">&#39;project&#39;</span><span class="p">,</span> <span class="s1">&#39;actris_level&#39;</span>
<span class="p">]</span>
<span class="c1"># Supported integration modes</span>
<span class="n">supported_integration_modes</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;collection&#39;</span><span class="p">,</span> <span class="s1">&#39;single_experiment&#39;</span><span class="p">]</span>
<span class="c1"># Set up logging</span>
<span class="n">date</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">created_at</span><span class="p">(</span><span class="s2">&quot;%Y_%m&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;:&quot;</span><span class="p">,</span> <span class="s2">&quot;-&quot;</span><span class="p">)</span>
<span class="n">utils</span><span class="o">.</span><span class="n">setup_logging</span><span class="p">(</span><span class="n">log_dir</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;integrate_data_sources_</span><span class="si">{</span><span class="n">date</span><span class="si">}</span><span class="s2">.log&quot;</span><span class="p">)</span>
<span class="c1"># Load YAML configuration file</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">yaml_config_file_path</span><span class="p">,</span> <span class="s1">&#39;r&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">stream</span><span class="p">:</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">config_dict</span> <span class="o">=</span> <span class="n">yaml</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">stream</span><span class="p">,</span> <span class="n">Loader</span><span class="o">=</span><span class="n">yaml</span><span class="o">.</span><span class="n">FullLoader</span><span class="p">)</span>
<span class="k">except</span> <span class="n">yaml</span><span class="o">.</span><span class="n">YAMLError</span> <span class="k">as</span> <span class="n">exc</span><span class="p">:</span>
<span class="n">logging</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s2">&quot;Error loading YAML file: </span><span class="si">%s</span><span class="s2">&quot;</span><span class="p">,</span> <span class="n">exc</span><span class="p">)</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Failed to load YAML file: </span><span class="si">{</span><span class="n">exc</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="c1"># Check if required keys are present</span>
<span class="n">missing_keys</span> <span class="o">=</span> <span class="p">[</span><span class="n">key</span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">required_keys</span> <span class="k">if</span> <span class="n">key</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">config_dict</span><span class="p">]</span>
<span class="k">if</span> <span class="n">missing_keys</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">KeyError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Missing required keys in YAML configuration: </span><span class="si">{</span><span class="n">missing_keys</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="c1"># Validate integration_mode</span>
<span class="n">integration_mode</span> <span class="o">=</span> <span class="n">config_dict</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;integration_mode&#39;</span><span class="p">,</span> <span class="s1">&#39;N/A&#39;</span><span class="p">)</span> <span class="c1"># Default to &#39;collection&#39;</span>
<span class="k">if</span> <span class="n">integration_mode</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">supported_integration_modes</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">RuntimeWarning</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;Unsupported integration_mode &#39;</span><span class="si">{</span><span class="n">integration_mode</span><span class="si">}</span><span class="s2">&#39;. Supported modes are </span><span class="si">{</span><span class="n">supported_integration_modes</span><span class="si">}</span><span class="s2">. Setting &#39;</span><span class="si">{</span><span class="n">integration_mode</span><span class="si">}</span><span class="s2">&#39; to &#39;single_experiment&#39;.&quot;</span>
<span class="p">)</span>
<span class="c1"># Validate datetime_steps format if it exists</span>
<span class="k">if</span> <span class="s1">&#39;datetime_steps&#39;</span> <span class="ow">in</span> <span class="n">config_dict</span><span class="p">:</span>
<span class="n">datetime_steps</span> <span class="o">=</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;datetime_steps&#39;</span><span class="p">]</span>
<span class="n">expected_format</span> <span class="o">=</span> <span class="s1">&#39;%Y-%m-</span><span class="si">%d</span><span class="s1"> %H-%M-%S&#39;</span>
<span class="c1"># Check if datetime_steps is a list or a falsy value</span>
<span class="k">if</span> <span class="n">datetime_steps</span> <span class="ow">and</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">datetime_steps</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;datetime_steps should be a list of strings or a falsy value (None, empty), but got </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">datetime_steps</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="k">for</span> <span class="n">step_idx</span><span class="p">,</span> <span class="n">step</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">datetime_steps</span><span class="p">):</span>
<span class="k">try</span><span class="p">:</span>
<span class="c1"># Attempt to parse the datetime to ensure correct format</span>
<span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;datetime_steps&#39;</span><span class="p">][</span><span class="n">step_idx</span><span class="p">]</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">strptime</span><span class="p">(</span><span class="n">step</span><span class="p">,</span> <span class="n">expected_format</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Invalid datetime format for &#39;</span><span class="si">{</span><span class="n">step</span><span class="si">}</span><span class="s2">&#39;. Expected format: </span><span class="si">{</span><span class="n">expected_format</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="c1"># Augment datatime_steps list as a dictionary. This to speed up single-experiment file generation</span>
<span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;datetime_steps_dict&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">_generate_datetime_dict</span><span class="p">(</span><span class="n">datetime_steps</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># If datetime_steps is not present, set the integration mode to &#39;collection&#39;</span>
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;datetime_steps missing, setting integration_mode to &#39;collection&#39;.&quot;</span><span class="p">)</span>
<span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;integration_mode&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;collection&#39;</span>
<span class="c1"># Validate filename_format if defined</span>
<span class="k">if</span> <span class="s1">&#39;filename_format&#39;</span> <span class="ow">in</span> <span class="n">config_dict</span><span class="p">:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;filename_format&#39;</span><span class="p">],</span> <span class="nb">str</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;&quot;Specified filename_format needs to be of String type&quot; &#39;</span><span class="p">)</span>
<span class="c1"># Split the string and check if each key exists in config_dict</span>
<span class="n">keys</span> <span class="o">=</span> <span class="p">[</span><span class="n">key</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;filename_format&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;,&#39;</span><span class="p">)]</span>
<span class="n">missing_keys</span> <span class="o">=</span> <span class="p">[</span><span class="n">key</span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">keys</span> <span class="k">if</span> <span class="n">key</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">config_dict</span><span class="p">]</span>
<span class="c1"># If there are any missing keys, raise an assertion error</span>
<span class="c1"># assert not missing_keys, f&#39;Missing key(s) in config_dict: {&quot;, &quot;.join(missing_keys)}&#39;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">missing_keys</span><span class="p">:</span>
<span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;filename_format&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;,&#39;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">keys</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;filename_format&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;&quot;filename_format&quot; should contain comma-separated keys that match existing keys in the YAML config file.&#39;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;Setting &quot;filename_format&quot; as None&#39;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;filename_format&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="c1"># Compute complementary metadata elements</span>
<span class="c1"># Create output filename prefix</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;filename_format&#39;</span><span class="p">]:</span> <span class="c1"># default behavior</span>
<span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;filename_prefix&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;_&#39;</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="n">config_dict</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">&#39;experiment&#39;</span><span class="p">,</span> <span class="s1">&#39;contact&#39;</span><span class="p">]])</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;filename_prefix&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;_&#39;</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="n">config_dict</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;filename_format&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">sep</span><span class="o">=</span><span class="s1">&#39;,&#39;</span><span class="p">)])</span>
<span class="c1"># Set default dates from datetime_steps if not provided</span>
<span class="n">current_date</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="s1">&#39;%Y-%m-</span><span class="si">%d</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="n">dates</span> <span class="o">=</span> <span class="n">config_dict</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;datetime_steps&#39;</span><span class="p">,[])</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">config_dict</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;dataset_startdate&#39;</span><span class="p">):</span>
<span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;dataset_startdate&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">min</span><span class="p">(</span><span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;datetime_steps&#39;</span><span class="p">])</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="s1">&#39;%Y-%m-</span><span class="si">%d</span><span class="s1">&#39;</span><span class="p">)</span> <span class="k">if</span> <span class="n">dates</span> <span class="k">else</span> <span class="n">current_date</span> <span class="c1"># Earliest datetime step</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">config_dict</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;dataset_enddate&#39;</span><span class="p">):</span>
<span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;dataset_enddate&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;datetime_steps&#39;</span><span class="p">])</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="s1">&#39;%Y-%m-</span><span class="si">%d</span><span class="s1">&#39;</span><span class="p">)</span> <span class="k">if</span> <span class="n">dates</span> <span class="k">else</span> <span class="n">current_date</span> <span class="c1"># Latest datetime step</span>
<span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;expected_datetime_format&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;%Y-%m-</span><span class="si">%d</span><span class="s1"> %H-%M-%S&#39;</span>
<span class="k">return</span> <span class="n">config_dict</span></div>
<div class="viewcode-block" id="copy_subtree_and_create_hdf5">
<a class="viewcode-back" href="../../modules/pipelines.html#pipelines.data_integration.copy_subtree_and_create_hdf5">[docs]</a>
<span class="k">def</span> <span class="nf">copy_subtree_and_create_hdf5</span><span class="p">(</span><span class="n">src</span><span class="p">,</span> <span class="n">dst</span><span class="p">,</span> <span class="n">select_dir_keywords</span><span class="p">,</span> <span class="n">select_file_keywords</span><span class="p">,</span> <span class="n">allowed_file_extensions</span><span class="p">,</span> <span class="n">root_metadata_dict</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Helper function to copy directory with constraints and create HDF5.&quot;&quot;&quot;</span>
<span class="n">src</span> <span class="o">=</span> <span class="n">src</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">sep</span><span class="p">,</span><span class="s1">&#39;/&#39;</span><span class="p">)</span>
<span class="n">dst</span> <span class="o">=</span> <span class="n">dst</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">sep</span><span class="p">,</span><span class="s1">&#39;/&#39;</span><span class="p">)</span>
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Creating constrained copy of the experimental campaign folder </span><span class="si">%s</span><span class="s2"> at: </span><span class="si">%s</span><span class="s2">&quot;</span><span class="p">,</span> <span class="n">src</span><span class="p">,</span> <span class="n">dst</span><span class="p">)</span>
<span class="n">path_to_files_dict</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">copy_directory_with_contraints</span><span class="p">(</span><span class="n">src</span><span class="p">,</span> <span class="n">dst</span><span class="p">,</span> <span class="n">select_dir_keywords</span><span class="p">,</span> <span class="n">select_file_keywords</span><span class="p">,</span> <span class="n">allowed_file_extensions</span><span class="p">)</span>
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Finished creating a copy of the experimental campaign folder tree at: </span><span class="si">%s</span><span class="s2">&quot;</span><span class="p">,</span> <span class="n">dst</span><span class="p">)</span>
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Creating HDF5 file at: </span><span class="si">%s</span><span class="s2">&quot;</span><span class="p">,</span> <span class="n">dst</span><span class="p">)</span>
<span class="n">hdf5_path</span> <span class="o">=</span> <span class="n">hdf5_lib</span><span class="o">.</span><span class="n">create_hdf5_file_from_filesystem_path</span><span class="p">(</span><span class="n">dst</span><span class="p">,</span> <span class="n">path_to_files_dict</span><span class="p">,</span> <span class="n">select_dir_keywords</span><span class="p">,</span> <span class="n">root_metadata_dict</span><span class="p">)</span>
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Completed creation of HDF5 file </span><span class="si">%s</span><span class="s2"> at: </span><span class="si">%s</span><span class="s2">&quot;</span><span class="p">,</span> <span class="n">hdf5_path</span><span class="p">,</span> <span class="n">dst</span><span class="p">)</span>
<span class="k">return</span> <span class="n">hdf5_path</span></div>
<div class="viewcode-block" id="run_pipeline">
<a class="viewcode-back" href="../../modules/pipelines.html#pipelines.data_integration.run_pipeline">[docs]</a>
<span class="k">def</span> <span class="nf">run_pipeline</span><span class="p">(</span><span class="n">path_to_config_yamlFile</span><span class="p">,</span> <span class="n">log_dir</span><span class="o">=</span><span class="s1">&#39;logs/&#39;</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Integrates data sources specified by the input configuration file into HDF5 files.</span>
<span class="sd"> Parameters:</span>
<span class="sd"> yaml_config_file_path (str): Path to the YAML configuration file.</span>
<span class="sd"> log_dir (str): Directory to save the log file.</span>
<span class="sd"> Returns:</span>
<span class="sd"> list: List of Paths to the created HDF5 file(s).</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">config_dict</span> <span class="o">=</span> <span class="n">load_config_and_setup_logging</span><span class="p">(</span><span class="n">path_to_config_yamlFile</span><span class="p">,</span> <span class="n">log_dir</span><span class="p">)</span>
<span class="n">path_to_input_dir</span> <span class="o">=</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;input_file_directory&#39;</span><span class="p">]</span>
<span class="n">path_to_output_dir</span> <span class="o">=</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;output_file_directory&#39;</span><span class="p">]</span>
<span class="n">select_dir_keywords</span> <span class="o">=</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;instrument_datafolder&#39;</span><span class="p">]</span>
<span class="c1"># Define root folder metadata dictionary</span>
<span class="n">root_metadata_dict</span> <span class="o">=</span> <span class="p">{</span><span class="n">key</span> <span class="p">:</span> <span class="n">config_dict</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">&#39;project&#39;</span><span class="p">,</span> <span class="s1">&#39;experiment&#39;</span><span class="p">,</span> <span class="s1">&#39;contact&#39;</span><span class="p">,</span> <span class="s1">&#39;actris_level&#39;</span><span class="p">]}</span>
<span class="c1"># Get dataset start and end dates</span>
<span class="n">dataset_startdate</span> <span class="o">=</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;dataset_startdate&#39;</span><span class="p">]</span>
<span class="n">dataset_enddate</span> <span class="o">=</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;dataset_enddate&#39;</span><span class="p">]</span>
<span class="c1"># Determine mode and process accordingly</span>
<span class="n">output_filename_path</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">campaign_name_template</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">filename_prefix</span><span class="p">,</span> <span class="n">suffix</span><span class="p">:</span> <span class="s1">&#39;_&#39;</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="n">filename_prefix</span><span class="p">,</span> <span class="n">suffix</span><span class="p">])</span>
<span class="n">date_str</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">dataset_startdate</span><span class="si">}</span><span class="s1">_</span><span class="si">{</span><span class="n">dataset_enddate</span><span class="si">}</span><span class="s1">&#39;</span>
<span class="c1"># Create path to new raw datafolder and standardize with forward slashes</span>
<span class="n">path_to_rawdata_folder</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span>
<span class="n">path_to_output_dir</span><span class="p">,</span> <span class="s1">&#39;collection_&#39;</span> <span class="o">+</span> <span class="n">campaign_name_template</span><span class="p">(</span><span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;filename_prefix&#39;</span><span class="p">],</span> <span class="n">date_str</span><span class="p">),</span> <span class="s2">&quot;&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">sep</span><span class="p">,</span> <span class="s1">&#39;/&#39;</span><span class="p">)</span>
<span class="c1"># Process individual datetime steps if available, regardless of mode </span>
<span class="k">if</span> <span class="n">config_dict</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;datetime_steps_dict&#39;</span><span class="p">,</span> <span class="p">{}):</span>
<span class="c1"># Single experiment mode</span>
<span class="k">for</span> <span class="n">datetime_step</span><span class="p">,</span> <span class="n">file_keywords</span> <span class="ow">in</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;datetime_steps_dict&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="n">date_str</span> <span class="o">=</span> <span class="n">datetime_step</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="s1">&#39;%Y-%m-</span><span class="si">%d</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="n">single_campaign_name</span> <span class="o">=</span> <span class="n">campaign_name_template</span><span class="p">(</span><span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;filename_prefix&#39;</span><span class="p">],</span> <span class="n">date_str</span><span class="p">)</span>
<span class="n">path_to_rawdata_subfolder</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">path_to_rawdata_folder</span><span class="p">,</span> <span class="n">single_campaign_name</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span>
<span class="n">path_to_integrated_stepwise_hdf5_file</span> <span class="o">=</span> <span class="n">copy_subtree_and_create_hdf5</span><span class="p">(</span>
<span class="n">path_to_input_dir</span><span class="p">,</span> <span class="n">path_to_rawdata_subfolder</span><span class="p">,</span> <span class="n">select_dir_keywords</span><span class="p">,</span>
<span class="n">file_keywords</span><span class="p">,</span> <span class="n">allowed_file_extensions</span><span class="p">,</span> <span class="n">root_metadata_dict</span><span class="p">)</span>
<span class="n">output_filename_path</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">path_to_integrated_stepwise_hdf5_file</span><span class="p">)</span>
<span class="c1"># Collection mode processing if specified</span>
<span class="k">if</span> <span class="s1">&#39;collection&#39;</span> <span class="ow">in</span> <span class="n">config_dict</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;integration_mode&#39;</span><span class="p">,</span> <span class="s1">&#39;single_experiment&#39;</span><span class="p">):</span>
<span class="n">path_to_filenames_dict</span> <span class="o">=</span> <span class="p">{</span><span class="n">path_to_rawdata_folder</span><span class="p">:</span> <span class="p">[</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">basename</span><span class="p">(</span><span class="n">path</span><span class="p">)</span> <span class="k">for</span> <span class="n">path</span> <span class="ow">in</span> <span class="n">output_filename_path</span><span class="p">]}</span> <span class="k">if</span> <span class="n">output_filename_path</span> <span class="k">else</span> <span class="p">{}</span>
<span class="n">hdf5_path</span> <span class="o">=</span> <span class="n">hdf5_lib</span><span class="o">.</span><span class="n">create_hdf5_file_from_filesystem_path</span><span class="p">(</span><span class="n">path_to_rawdata_folder</span><span class="p">,</span> <span class="n">path_to_filenames_dict</span><span class="p">,</span> <span class="p">[],</span> <span class="n">root_metadata_dict</span><span class="p">)</span>
<span class="n">output_filename_path</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">hdf5_path</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">path_to_integrated_stepwise_hdf5_file</span> <span class="o">=</span> <span class="n">copy_subtree_and_create_hdf5</span><span class="p">(</span>
<span class="n">path_to_input_dir</span><span class="p">,</span> <span class="n">path_to_rawdata_folder</span><span class="p">,</span> <span class="n">select_dir_keywords</span><span class="p">,</span> <span class="p">[],</span>
<span class="n">allowed_file_extensions</span><span class="p">,</span> <span class="n">root_metadata_dict</span><span class="p">)</span>
<span class="n">output_filename_path</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">path_to_integrated_stepwise_hdf5_file</span><span class="p">)</span>
<span class="k">return</span> <span class="n">output_filename_path</span></div>
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">&quot;__main__&quot;</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">)</span> <span class="o">&lt;</span> <span class="mi">2</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Usage: python data_integration.py &lt;function_name&gt; &lt;function_args&gt;&quot;</span><span class="p">)</span>
<span class="n">sys</span><span class="o">.</span><span class="n">exit</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="c1"># Extract the function name from the command line arguments</span>
<span class="n">function_name</span> <span class="o">=</span> <span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<span class="c1"># Handle function execution based on the provided function name</span>
<span class="k">if</span> <span class="n">function_name</span> <span class="o">==</span> <span class="s1">&#39;run&#39;</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">3</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Usage: python data_integration.py run &lt;path_to_config_yamlFile&gt;&quot;</span><span class="p">)</span>
<span class="n">sys</span><span class="o">.</span><span class="n">exit</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="c1"># Extract path to configuration file, specifying the data integration task </span>
<span class="n">path_to_config_yamlFile</span> <span class="o">=</span> <span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span>
<span class="n">run_pipeline</span><span class="p">(</span><span class="n">path_to_config_yamlFile</span><span class="p">)</span>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, JFFO.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@@ -0,0 +1,299 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>pipelines.metadata_revision &mdash; DIMA 1.0.0 documentation</title>
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../_static/documentation_options.js?v=8d563738"></script>
<script src="../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script src="../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../index.html" class="icon icon-home">
DIMA
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html">HDF5 data operations</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.hdf5_writer">Data integration with HDF5</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.hdf5_vis">Data visualization</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/pipelines.html">Pipelines and workflows</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/utils.html">Utilities</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../index.html">DIMA</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
<li class="breadcrumb-item active">pipelines.metadata_revision</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for pipelines.metadata_revision</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">sys</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">thisFilePath</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="vm">__file__</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">NameError</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Error: __file__ is not available. Ensure the script is being run from a file.&quot;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;[Notice] Path to DIMA package may not be resolved properly.&quot;</span><span class="p">)</span>
<span class="n">thisFilePath</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">getcwd</span><span class="p">()</span> <span class="c1"># Use current directory or specify a default</span>
<span class="n">dimaPath</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">normpath</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">thisFilePath</span><span class="p">,</span> <span class="s2">&quot;..&quot;</span><span class="p">,</span><span class="s1">&#39;..&#39;</span><span class="p">))</span> <span class="c1"># Move up to project root</span>
<span class="k">if</span> <span class="n">dimaPath</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">sys</span><span class="o">.</span><span class="n">path</span><span class="p">:</span> <span class="c1"># Avoid duplicate entries</span>
<span class="n">sys</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">dimaPath</span><span class="p">)</span>
<span class="kn">import</span> <span class="nn">h5py</span>
<span class="kn">import</span> <span class="nn">yaml</span>
<span class="kn">import</span> <span class="nn">src.hdf5_ops</span> <span class="k">as</span> <span class="nn">hdf5_ops</span>
<div class="viewcode-block" id="load_yaml">
<a class="viewcode-back" href="../../modules/pipelines.html#pipelines.metadata_revision.load_yaml">[docs]</a>
<span class="k">def</span> <span class="nf">load_yaml</span><span class="p">(</span><span class="n">review_yaml_file</span><span class="p">):</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">review_yaml_file</span><span class="p">,</span> <span class="s1">&#39;r&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">stream</span><span class="p">:</span>
<span class="k">try</span><span class="p">:</span>
<span class="k">return</span> <span class="n">yaml</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">stream</span><span class="p">,</span> <span class="n">Loader</span><span class="o">=</span><span class="n">yaml</span><span class="o">.</span><span class="n">FullLoader</span><span class="p">)</span>
<span class="k">except</span> <span class="n">yaml</span><span class="o">.</span><span class="n">YAMLError</span> <span class="k">as</span> <span class="n">exc</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="n">exc</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">None</span></div>
<div class="viewcode-block" id="validate_yaml_dict">
<a class="viewcode-back" href="../../modules/pipelines.html#pipelines.metadata_revision.validate_yaml_dict">[docs]</a>
<span class="k">def</span> <span class="nf">validate_yaml_dict</span><span class="p">(</span><span class="n">input_hdf5_file</span><span class="p">,</span> <span class="n">yaml_dict</span><span class="p">):</span>
<span class="n">errors</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">notes</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">with</span> <span class="n">h5py</span><span class="o">.</span><span class="n">File</span><span class="p">(</span><span class="n">input_hdf5_file</span><span class="p">,</span> <span class="s1">&#39;r&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">hdf5_file</span><span class="p">:</span>
<span class="c1"># 1. Check for valid object names</span>
<span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">yaml_dict</span><span class="p">:</span>
<span class="k">if</span> <span class="n">key</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">hdf5_file</span><span class="p">:</span>
<span class="n">error_msg</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;Error: </span><span class="si">{</span><span class="n">key</span><span class="si">}</span><span class="s2"> is not a valid object&#39;s name in the HDF5 file.&quot;</span>
<span class="nb">print</span><span class="p">(</span><span class="n">error_msg</span><span class="p">)</span>
<span class="n">errors</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">error_msg</span><span class="p">)</span>
<span class="c1"># 2. Confirm metadata dict for each object is a dictionary</span>
<span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">meta_dict</span> <span class="ow">in</span> <span class="n">yaml_dict</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">meta_dict</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
<span class="n">error_msg</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;Error: Metadata for </span><span class="si">{</span><span class="n">key</span><span class="si">}</span><span class="s2"> should be a dictionary.&quot;</span>
<span class="nb">print</span><span class="p">(</span><span class="n">error_msg</span><span class="p">)</span>
<span class="n">errors</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">error_msg</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="s1">&#39;attributes&#39;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">meta_dict</span><span class="p">:</span>
<span class="n">warning_msg</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;Warning: No &#39;attributes&#39; in metadata dict for </span><span class="si">{</span><span class="n">key</span><span class="si">}</span><span class="s2">.&quot;</span>
<span class="nb">print</span><span class="p">(</span><span class="n">warning_msg</span><span class="p">)</span>
<span class="n">notes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">warning_msg</span><span class="p">)</span>
<span class="c1"># 3. Verify update, append, and delete operations are well specified </span>
<span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">meta_dict</span> <span class="ow">in</span> <span class="n">yaml_dict</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="n">attributes</span> <span class="o">=</span> <span class="n">meta_dict</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;attributes&quot;</span><span class="p">,</span> <span class="p">{})</span>
<span class="k">for</span> <span class="n">attr_name</span><span class="p">,</span> <span class="n">attr_value</span> <span class="ow">in</span> <span class="n">attributes</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="c1"># Ensure the object exists before accessing attributes</span>
<span class="k">if</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">hdf5_file</span><span class="p">:</span>
<span class="n">hdf5_obj_attrs</span> <span class="o">=</span> <span class="n">hdf5_file</span><span class="p">[</span><span class="n">key</span><span class="p">]</span><span class="o">.</span><span class="n">attrs</span> <span class="c1"># Access object-specific attributes</span>
<span class="k">if</span> <span class="n">attr_name</span> <span class="ow">in</span> <span class="n">hdf5_obj_attrs</span><span class="p">:</span>
<span class="c1"># Attribute exists: it can be updated or deleted</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">attr_value</span><span class="p">,</span> <span class="nb">dict</span><span class="p">)</span> <span class="ow">and</span> <span class="s2">&quot;delete&quot;</span> <span class="ow">in</span> <span class="n">attr_value</span><span class="p">:</span>
<span class="n">note_msg</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;Note: &#39;</span><span class="si">{</span><span class="n">attr_name</span><span class="si">}</span><span class="s2">&#39; in </span><span class="si">{</span><span class="n">key</span><span class="si">}</span><span class="s2"> may be deleted if &#39;delete&#39; is set as true.&quot;</span>
<span class="nb">print</span><span class="p">(</span><span class="n">note_msg</span><span class="p">)</span>
<span class="n">notes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">note_msg</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">note_msg</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;Note: &#39;</span><span class="si">{</span><span class="n">attr_name</span><span class="si">}</span><span class="s2">&#39; in </span><span class="si">{</span><span class="n">key</span><span class="si">}</span><span class="s2"> will be updated.&quot;</span>
<span class="nb">print</span><span class="p">(</span><span class="n">note_msg</span><span class="p">)</span>
<span class="n">notes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">note_msg</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># Attribute does not exist: it can be appended or flagged as an invalid delete</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">attr_value</span><span class="p">,</span> <span class="nb">dict</span><span class="p">)</span> <span class="ow">and</span> <span class="s2">&quot;delete&quot;</span> <span class="ow">in</span> <span class="n">attr_value</span><span class="p">:</span>
<span class="n">error_msg</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;Error: Cannot delete non-existent attribute &#39;</span><span class="si">{</span><span class="n">attr_name</span><span class="si">}</span><span class="s2">&#39; in </span><span class="si">{</span><span class="n">key</span><span class="si">}</span><span class="s2">.&quot;</span>
<span class="nb">print</span><span class="p">(</span><span class="n">error_msg</span><span class="p">)</span>
<span class="n">errors</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">error_msg</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">note_msg</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;Note: &#39;</span><span class="si">{</span><span class="n">attr_name</span><span class="si">}</span><span class="s2">&#39; in </span><span class="si">{</span><span class="n">key</span><span class="si">}</span><span class="s2"> will be appended.&quot;</span>
<span class="nb">print</span><span class="p">(</span><span class="n">note_msg</span><span class="p">)</span>
<span class="n">notes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">note_msg</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">error_msg</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;Error: &#39;</span><span class="si">{</span><span class="n">key</span><span class="si">}</span><span class="s2">&#39; is not a valid object in the HDF5 file.&quot;</span>
<span class="nb">print</span><span class="p">(</span><span class="n">error_msg</span><span class="p">)</span>
<span class="n">errors</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">error_msg</span><span class="p">)</span>
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="n">errors</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">,</span> <span class="n">errors</span><span class="p">,</span> <span class="n">notes</span></div>
<div class="viewcode-block" id="update_hdf5_file_with_review">
<a class="viewcode-back" href="../../modules/pipelines.html#pipelines.metadata_revision.update_hdf5_file_with_review">[docs]</a>
<span class="k">def</span> <span class="nf">update_hdf5_file_with_review</span><span class="p">(</span><span class="n">input_hdf5_file</span><span class="p">,</span> <span class="n">review_yaml_file</span><span class="p">):</span>
<span class="w"> </span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Updates, appends, or deletes metadata attributes in an HDF5 file based on a provided YAML dictionary.</span>
<span class="sd"> </span>
<span class="sd"> Parameters:</span>
<span class="sd"> -----------</span>
<span class="sd"> input_hdf5_file : str</span>
<span class="sd"> Path to the HDF5 file.</span>
<span class="sd"> </span>
<span class="sd"> yaml_dict : dict</span>
<span class="sd"> Dictionary specifying objects and their attributes with operations. Example format:</span>
<span class="sd"> {</span>
<span class="sd"> &quot;object_name&quot;: { &quot;attributes&quot; : &quot;attr_name&quot;: { &quot;value&quot;: attr_value,</span>
<span class="sd"> &quot;delete&quot;: true | false</span>
<span class="sd"> }</span>
<span class="sd"> }</span>
<span class="sd"> }</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">yaml_dict</span> <span class="o">=</span> <span class="n">load_yaml</span><span class="p">(</span><span class="n">review_yaml_file</span><span class="p">)</span>
<span class="n">success</span><span class="p">,</span> <span class="n">errors</span><span class="p">,</span> <span class="n">notes</span> <span class="o">=</span> <span class="n">validate_yaml_dict</span><span class="p">(</span><span class="n">input_hdf5_file</span><span class="p">,</span><span class="n">yaml_dict</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">success</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Review yaml file </span><span class="si">{</span><span class="n">review_yaml_file</span><span class="si">}</span><span class="s2"> is invalid. Validation errors: </span><span class="si">{</span><span class="n">errors</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="c1"># Initialize HDF5 operations manager</span>
<span class="n">DataOpsAPI</span> <span class="o">=</span> <span class="n">hdf5_ops</span><span class="o">.</span><span class="n">HDF5DataOpsManager</span><span class="p">(</span><span class="n">input_hdf5_file</span><span class="p">)</span>
<span class="n">DataOpsAPI</span><span class="o">.</span><span class="n">load_file_obj</span><span class="p">()</span>
<span class="c1"># Iterate over each object in the YAML dictionary</span>
<span class="k">for</span> <span class="n">obj_name</span><span class="p">,</span> <span class="n">attr_dict</span> <span class="ow">in</span> <span class="n">yaml_dict</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="c1"># Prepare dictionaries for append, update, and delete actions</span>
<span class="n">append_dict</span> <span class="o">=</span> <span class="p">{}</span>
<span class="n">update_dict</span> <span class="o">=</span> <span class="p">{}</span>
<span class="n">delete_dict</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">obj_name</span> <span class="ow">in</span> <span class="n">DataOpsAPI</span><span class="o">.</span><span class="n">file_obj</span><span class="p">:</span>
<span class="k">continue</span> <span class="c1"># Skip if the object does not exist</span>
<span class="c1"># Iterate over each attribute in the current object</span>
<span class="k">for</span> <span class="n">attr_name</span><span class="p">,</span> <span class="n">attr_props</span> <span class="ow">in</span> <span class="n">attr_dict</span><span class="p">[</span><span class="s1">&#39;attributes&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">attr_props</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
<span class="c1">#attr_props = {&#39;value&#39;: attr_props}</span>
<span class="c1"># Check if the attribute exists (for updating)</span>
<span class="k">if</span> <span class="n">attr_name</span> <span class="ow">in</span> <span class="n">DataOpsAPI</span><span class="o">.</span><span class="n">file_obj</span><span class="p">[</span><span class="n">obj_name</span><span class="p">]</span><span class="o">.</span><span class="n">attrs</span><span class="p">:</span>
<span class="n">update_dict</span><span class="p">[</span><span class="n">attr_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">attr_props</span>
<span class="c1"># Otherwise, it&#39;s a new attribute to append</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">append_dict</span><span class="p">[</span><span class="n">attr_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">attr_props</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># Check if the attribute is marked for deletion</span>
<span class="k">if</span> <span class="n">attr_props</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;delete&#39;</span><span class="p">,</span> <span class="kc">False</span><span class="p">):</span>
<span class="n">delete_dict</span><span class="p">[</span><span class="n">attr_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">attr_props</span>
<span class="c1"># Perform a single pass for all three operations</span>
<span class="k">if</span> <span class="n">append_dict</span><span class="p">:</span>
<span class="n">DataOpsAPI</span><span class="o">.</span><span class="n">append_metadata</span><span class="p">(</span><span class="n">obj_name</span><span class="p">,</span> <span class="n">append_dict</span><span class="p">)</span>
<span class="k">if</span> <span class="n">update_dict</span><span class="p">:</span>
<span class="n">DataOpsAPI</span><span class="o">.</span><span class="n">update_metadata</span><span class="p">(</span><span class="n">obj_name</span><span class="p">,</span> <span class="n">update_dict</span><span class="p">)</span>
<span class="k">if</span> <span class="n">delete_dict</span><span class="p">:</span>
<span class="n">DataOpsAPI</span><span class="o">.</span><span class="n">delete_metadata</span><span class="p">(</span><span class="n">obj_name</span><span class="p">,</span> <span class="n">delete_dict</span><span class="p">)</span>
<span class="c1"># Close hdf5 file</span>
<span class="n">DataOpsAPI</span><span class="o">.</span><span class="n">unload_file_obj</span><span class="p">()</span>
<span class="c1"># Regenerate yaml snapshot of updated HDF5 file</span>
<span class="n">output_yml_filename_path</span> <span class="o">=</span> <span class="n">hdf5_ops</span><span class="o">.</span><span class="n">serialize_metadata</span><span class="p">(</span><span class="n">input_hdf5_file</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">output_yml_filename_path</span><span class="si">}</span><span class="s1"> was successfully regenerated from the updated version of</span><span class="si">{</span><span class="n">input_hdf5_file</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span></div>
<div class="viewcode-block" id="count">
<a class="viewcode-back" href="../../modules/pipelines.html#pipelines.metadata_revision.count">[docs]</a>
<span class="k">def</span> <span class="nf">count</span><span class="p">(</span><span class="n">hdf5_obj</span><span class="p">,</span><span class="n">yml_dict</span><span class="p">):</span>
<span class="nb">print</span><span class="p">(</span><span class="n">hdf5_obj</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">hdf5_obj</span><span class="p">,</span><span class="n">h5py</span><span class="o">.</span><span class="n">Group</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">hdf5_obj</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;/&#39;</span><span class="p">))</span> <span class="o">&lt;=</span> <span class="mi">4</span><span class="p">:</span>
<span class="n">obj_review</span> <span class="o">=</span> <span class="n">yml_dict</span><span class="p">[</span><span class="n">hdf5_obj</span><span class="o">.</span><span class="n">name</span><span class="p">]</span>
<span class="n">additions</span> <span class="o">=</span> <span class="p">[</span><span class="ow">not</span> <span class="p">(</span><span class="n">item</span> <span class="ow">in</span> <span class="n">hdf5_obj</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">obj_review</span><span class="p">[</span><span class="s1">&#39;attributes&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">keys</span><span class="p">()]</span>
<span class="n">count_additions</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">(</span><span class="n">additions</span><span class="p">)</span>
<span class="n">deletions</span> <span class="o">=</span> <span class="p">[</span><span class="ow">not</span> <span class="p">(</span><span class="n">item</span> <span class="ow">in</span> <span class="n">obj_review</span><span class="p">[</span><span class="s1">&#39;attributes&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">hdf5_obj</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">keys</span><span class="p">()]</span>
<span class="n">count_delections</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">(</span><span class="n">deletions</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;additions&#39;</span><span class="p">,</span><span class="n">count_additions</span><span class="p">,</span> <span class="s1">&#39;deletions&#39;</span><span class="p">,</span> <span class="n">count_delections</span><span class="p">)</span></div>
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">&quot;__main__&quot;</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">)</span> <span class="o">&lt;</span> <span class="mi">4</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Usage: python metadata_revision.py update &lt;path/to/target_file.hdf5&gt; &lt;path/to/metadata_review_file.yaml&gt;&quot;</span><span class="p">)</span>
<span class="n">sys</span><span class="o">.</span><span class="n">exit</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">==</span> <span class="s1">&#39;update&#39;</span><span class="p">:</span>
<span class="n">input_hdf5_file</span> <span class="o">=</span> <span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span>
<span class="n">review_yaml_file</span> <span class="o">=</span> <span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">[</span><span class="mi">3</span><span class="p">]</span>
<span class="n">update_hdf5_file_with_review</span><span class="p">(</span><span class="n">input_hdf5_file</span><span class="p">,</span> <span class="n">review_yaml_file</span><span class="p">)</span>
<span class="c1">#run(sys.argv[2])</span>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, JFFO.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@@ -0,0 +1,210 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>src.data_integration_lib &mdash; DIMA 1.0.0 documentation</title>
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../_static/documentation_options.js?v=8d563738"></script>
<script src="../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script src="../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../index.html" class="icon icon-home">
DIMA
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html">HDF5 data operations</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.hdf5_lib">Data integration with HDF5</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.metadata_review_lib">Data annotation and review</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.hdf5_vis">Data visualization</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.g5505_file_reader">Utilities</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../index.html">DIMA</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
<li class="breadcrumb-item active">src.data_integration_lib</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for src.data_integration_lib</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">os</span>
<span class="kn">import</span> <span class="nn">src.hdf5_lib</span> <span class="k">as</span> <span class="nn">hdf5_lib</span>
<span class="kn">import</span> <span class="nn">src.g5505_utils</span> <span class="k">as</span> <span class="nn">utils</span>
<span class="kn">import</span> <span class="nn">yaml</span>
<span class="kn">import</span> <span class="nn">logging</span>
<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span>
<div class="viewcode-block" id="integrate_data_sources">
<a class="viewcode-back" href="../../modules/src.html#src.data_integration_lib.integrate_data_sources">[docs]</a>
<span class="k">def</span> <span class="nf">integrate_data_sources</span><span class="p">(</span><span class="n">yaml_config_file_path</span><span class="p">,</span> <span class="n">log_dir</span><span class="o">=</span><span class="s1">&#39;logs/&#39;</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot; Integrates data sources specified by the input configuration file into HDF5 files.</span>
<span class="sd"> Parameters:</span>
<span class="sd"> yaml_config_file_path (str): Path to the YAML configuration file.</span>
<span class="sd"> log_dir (str): Directory to save the log file.</span>
<span class="sd"> Returns:</span>
<span class="sd"> str: Path (or list of Paths) to the created HDF5 file(s).</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">date</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">created_at</span><span class="p">()</span>
<span class="n">utils</span><span class="o">.</span><span class="n">setup_logging</span><span class="p">(</span><span class="n">log_dir</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;integrate_data_sources_</span><span class="si">{</span><span class="n">date</span><span class="si">}</span><span class="s2">.log&quot;</span><span class="p">)</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">yaml_config_file_path</span><span class="p">,</span><span class="s1">&#39;r&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">stream</span><span class="p">:</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">config_dict</span> <span class="o">=</span> <span class="n">yaml</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">stream</span><span class="p">,</span> <span class="n">Loader</span><span class="o">=</span><span class="n">yaml</span><span class="o">.</span><span class="n">FullLoader</span><span class="p">)</span>
<span class="k">except</span> <span class="n">yaml</span><span class="o">.</span><span class="n">YAMLError</span> <span class="k">as</span> <span class="n">exc</span><span class="p">:</span>
<span class="n">logging</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s2">&quot;Error loading YAML file: </span><span class="si">%s</span><span class="s2">&quot;</span><span class="p">,</span> <span class="n">exc</span><span class="p">)</span>
<span class="k">raise</span>
<span class="k">def</span> <span class="nf">output_filename</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">date</span><span class="p">,</span> <span class="n">initials</span><span class="p">):</span>
<span class="k">return</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">name</span><span class="si">}</span><span class="s2">_</span><span class="si">{</span><span class="n">date</span><span class="si">}</span><span class="s2">_</span><span class="si">{</span><span class="n">initials</span><span class="si">}</span><span class="s2">.h5&quot;</span>
<span class="n">exp_campaign_name</span> <span class="o">=</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;experiment&#39;</span><span class="p">]</span>
<span class="n">initials</span> <span class="o">=</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;contact&#39;</span><span class="p">]</span>
<span class="n">input_file_dir</span> <span class="o">=</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;input_file_directory&#39;</span><span class="p">]</span>
<span class="n">output_dir</span> <span class="o">=</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;output_file_directory&#39;</span><span class="p">]</span>
<span class="n">select_dir_keywords</span> <span class="o">=</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;instrument_datafolder&#39;</span><span class="p">]</span>
<span class="n">root_metadata_dict</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;project&#39;</span> <span class="p">:</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;project&#39;</span><span class="p">],</span>
<span class="s1">&#39;experiment&#39;</span> <span class="p">:</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;experiment&#39;</span><span class="p">],</span>
<span class="s1">&#39;contact&#39;</span> <span class="p">:</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;contact&#39;</span><span class="p">],</span>
<span class="s1">&#39;actris_level&#39;</span><span class="p">:</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;actris_level&#39;</span><span class="p">]</span>
<span class="p">}</span>
<span class="k">def</span> <span class="nf">create_hdf5_file</span><span class="p">(</span><span class="n">date_str</span><span class="p">,</span> <span class="n">select_file_keywords</span><span class="p">,</span><span class="n">root_metadata</span><span class="p">):</span>
<span class="n">filename</span> <span class="o">=</span> <span class="n">output_filename</span><span class="p">(</span><span class="n">exp_campaign_name</span><span class="p">,</span> <span class="n">date_str</span><span class="p">,</span> <span class="n">initials</span><span class="p">)</span>
<span class="n">output_path</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">output_dir</span><span class="p">,</span> <span class="n">filename</span><span class="p">)</span>
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Creating HDF5 file at: </span><span class="si">%s</span><span class="s2">&quot;</span><span class="p">,</span> <span class="n">output_path</span><span class="p">)</span>
<span class="k">return</span> <span class="n">hdf5_lib</span><span class="o">.</span><span class="n">create_hdf5_file_from_filesystem_path</span><span class="p">(</span>
<span class="n">output_path</span><span class="p">,</span> <span class="n">input_file_dir</span><span class="p">,</span> <span class="n">select_dir_keywords</span><span class="p">,</span> <span class="n">select_file_keywords</span><span class="p">,</span> <span class="n">root_metadata_dict</span><span class="o">=</span><span class="n">root_metadata</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">config_dict</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;datetime_steps&#39;</span><span class="p">):</span>
<span class="n">datetime_augment_dict</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">for</span> <span class="n">datetime_step</span> <span class="ow">in</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;datetime_steps&#39;</span><span class="p">]:</span>
<span class="n">tmp</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">strptime</span><span class="p">(</span><span class="n">datetime_step</span><span class="p">,</span><span class="s1">&#39;%Y-%m-</span><span class="si">%d</span><span class="s1"> %H-%M-%S&#39;</span><span class="p">)</span> <span class="c1">#convert(datetime_step)</span>
<span class="n">datetime_augment_dict</span><span class="p">[</span><span class="n">tmp</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span><span class="n">tmp</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="s1">&#39;%Y-%m-</span><span class="si">%d</span><span class="s1">&#39;</span><span class="p">),</span><span class="n">tmp</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="s1">&#39;%Y_%m_</span><span class="si">%d</span><span class="s1">&#39;</span><span class="p">),</span><span class="n">tmp</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="s1">&#39;%Y.%m.</span><span class="si">%d</span><span class="s1">&#39;</span><span class="p">),</span><span class="n">tmp</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="s1">&#39;%Y%m</span><span class="si">%d</span><span class="s1">&#39;</span><span class="p">)]</span>
<span class="nb">print</span><span class="p">(</span><span class="n">tmp</span><span class="p">)</span>
<span class="k">if</span> <span class="s1">&#39;single_experiment&#39;</span> <span class="ow">in</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;integration_mode&#39;</span><span class="p">]:</span>
<span class="n">output_filename_path</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">datetime_step</span> <span class="ow">in</span> <span class="n">datetime_augment_dict</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
<span class="n">date_str</span> <span class="o">=</span> <span class="n">datetime_step</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="s1">&#39;%Y-%m-</span><span class="si">%d</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="n">select_file_keywords</span> <span class="o">=</span> <span class="n">datetime_augment_dict</span><span class="p">[</span><span class="n">datetime_step</span><span class="p">]</span>
<span class="n">root_metadata_dict</span><span class="o">.</span><span class="n">update</span><span class="p">({</span><span class="s1">&#39;dataset_startdate&#39;</span><span class="p">:</span> <span class="n">date_str</span><span class="p">,</span>
<span class="s1">&#39;dataset_enddate&#39;</span><span class="p">:</span> <span class="n">date_str</span><span class="p">})</span>
<span class="n">dt_step_output_filename_path</span><span class="o">=</span> <span class="n">create_hdf5_file</span><span class="p">(</span><span class="n">date_str</span><span class="p">,</span> <span class="n">select_file_keywords</span><span class="p">,</span> <span class="n">root_metadata_dict</span><span class="p">)</span>
<span class="n">output_filename_path</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">dt_step_output_filename_path</span><span class="p">)</span>
<span class="k">elif</span> <span class="s1">&#39;collection&#39;</span> <span class="ow">in</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;integration_mode&#39;</span><span class="p">]:</span>
<span class="n">select_file_keywords</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">datetime_step</span> <span class="ow">in</span> <span class="n">datetime_augment_dict</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
<span class="n">select_file_keywords</span> <span class="o">=</span> <span class="n">select_file_keywords</span> <span class="o">+</span> <span class="n">datetime_augment_dict</span><span class="p">[</span><span class="n">datetime_step</span><span class="p">]</span>
<span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;dataset_startdate&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">min</span><span class="p">(</span><span class="n">datetime_augment_dict</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
<span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;dataset_enddate&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="n">datetime_augment_dict</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
<span class="n">startdate</span> <span class="o">=</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;dataset_startdate&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="s1">&#39;%Y-%m-</span><span class="si">%d</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="n">enddate</span> <span class="o">=</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;dataset_enddate&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="s1">&#39;%Y-%m-</span><span class="si">%d</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="n">root_metadata_dict</span><span class="o">.</span><span class="n">update</span><span class="p">({</span><span class="s1">&#39;dataset_startdate&#39;</span><span class="p">:</span> <span class="n">startdate</span><span class="p">,</span>
<span class="s1">&#39;dataset_enddate&#39;</span><span class="p">:</span> <span class="n">enddate</span><span class="p">})</span>
<span class="n">date_str</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">startdate</span><span class="si">}</span><span class="s1">_</span><span class="si">{</span><span class="n">enddate</span><span class="si">}</span><span class="s1">&#39;</span>
<span class="n">output_filename_path</span> <span class="o">=</span> <span class="n">create_hdf5_file</span><span class="p">(</span><span class="n">date_str</span><span class="p">,</span> <span class="n">select_file_keywords</span><span class="p">,</span> <span class="n">root_metadata_dict</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">startdate</span> <span class="o">=</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;dataset_startdate&#39;</span><span class="p">]</span>
<span class="n">enddate</span> <span class="o">=</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;dataset_enddate&#39;</span><span class="p">]</span>
<span class="n">root_metadata_dict</span><span class="o">.</span><span class="n">update</span><span class="p">({</span><span class="s1">&#39;dataset_startdate&#39;</span><span class="p">:</span> <span class="n">startdate</span><span class="p">,</span>
<span class="s1">&#39;dataset_enddate&#39;</span><span class="p">:</span> <span class="n">enddate</span><span class="p">})</span>
<span class="n">date_str</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">startdate</span><span class="si">}</span><span class="s1">_</span><span class="si">{</span><span class="n">enddate</span><span class="si">}</span><span class="s1">&#39;</span>
<span class="n">output_filename_path</span> <span class="o">=</span> <span class="n">create_hdf5_file</span><span class="p">(</span><span class="n">date_str</span><span class="p">,</span> <span class="n">select_file_keywords</span> <span class="o">=</span> <span class="p">[],</span> <span class="n">root_metadata</span> <span class="o">=</span> <span class="n">root_metadata_dict</span><span class="p">)</span>
<span class="k">return</span> <span class="n">output_filename_path</span></div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, JFFO.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@@ -0,0 +1,448 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>src.g5505_file_reader &mdash; DIMA 1.0.0 documentation</title>
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../_static/documentation_options.js?v=8d563738"></script>
<script src="../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script src="../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../index.html" class="icon icon-home">
DIMA
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html">HDF5 data operations</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.hdf5_lib">Data integration with HDF5</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.metadata_review_lib">Data annotation and review</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.hdf5_vis">Data visualization</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.g5505_file_reader">Utilities</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../index.html">DIMA</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
<li class="breadcrumb-item active">src.g5505_file_reader</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for src.g5505_file_reader</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">os</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
<span class="kn">import</span> <span class="nn">collections</span>
<span class="kn">from</span> <span class="nn">igor2.binarywave</span> <span class="kn">import</span> <span class="n">load</span> <span class="k">as</span> <span class="n">loadibw</span>
<span class="kn">import</span> <span class="nn">src.g5505_utils</span> <span class="k">as</span> <span class="nn">utils</span>
<span class="c1">#import src.metadata_review_lib as metadata</span>
<span class="c1">#from src.metadata_review_lib import parse_attribute</span>
<span class="kn">import</span> <span class="nn">yaml</span>
<span class="kn">import</span> <span class="nn">h5py</span>
<span class="n">ROOT_DIR</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">curdir</span><span class="p">)</span>
<div class="viewcode-block" id="read_xps_ibw_file_as_dict">
<a class="viewcode-back" href="../../modules/src.html#src.g5505_file_reader.read_xps_ibw_file_as_dict">[docs]</a>
<span class="k">def</span> <span class="nf">read_xps_ibw_file_as_dict</span><span class="p">(</span><span class="n">filename</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Reads IBW files from the Multiphase Chemistry Group, which contain XPS spectra and acquisition settings,</span>
<span class="sd"> and formats the data into a dictionary with the structure {datasets: list of datasets}. Each dataset in the</span>
<span class="sd"> list has the following structure:</span>
<span class="sd"> {</span>
<span class="sd"> &#39;name&#39;: &#39;name&#39;,</span>
<span class="sd"> &#39;data&#39;: data_array,</span>
<span class="sd"> &#39;data_units&#39;: &#39;units&#39;,</span>
<span class="sd"> &#39;shape&#39;: data_shape,</span>
<span class="sd"> &#39;dtype&#39;: data_type</span>
<span class="sd"> }</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> filename : str</span>
<span class="sd"> The IBW filename from the Multiphase Chemistry Group beamline.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> file_dict : dict</span>
<span class="sd"> A dictionary containing the datasets from the IBW file. </span>
<span class="sd"> Raises</span>
<span class="sd"> ------</span>
<span class="sd"> ValueError</span>
<span class="sd"> If the input IBW file is not a valid IBW file.</span>
<span class="sd"> </span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">file_obj</span> <span class="o">=</span> <span class="n">loadibw</span><span class="p">(</span><span class="n">filename</span><span class="p">)</span>
<span class="n">required_keys</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;wData&#39;</span><span class="p">,</span><span class="s1">&#39;data_units&#39;</span><span class="p">,</span><span class="s1">&#39;dimension_units&#39;</span><span class="p">,</span><span class="s1">&#39;note&#39;</span><span class="p">]</span>
<span class="k">if</span> <span class="nb">sum</span><span class="p">([</span><span class="n">item</span> <span class="ow">in</span> <span class="n">required_keys</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">file_obj</span><span class="p">[</span><span class="s1">&#39;wave&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">keys</span><span class="p">()])</span> <span class="o">&lt;</span> <span class="nb">len</span><span class="p">(</span><span class="n">required_keys</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">&#39;This is not a valid xps ibw file. It does not satisfy minimum adimissibility criteria.&#39;</span><span class="p">)</span>
<span class="n">file_dict</span> <span class="o">=</span> <span class="p">{}</span>
<span class="n">path_tail</span><span class="p">,</span> <span class="n">path_head</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">filename</span><span class="p">)</span>
<span class="c1"># Group name and attributes</span>
<span class="n">file_dict</span><span class="p">[</span><span class="s1">&#39;name&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">path_head</span>
<span class="n">file_dict</span><span class="p">[</span><span class="s1">&#39;attributes_dict&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
<span class="c1"># Convert notes of bytes class to string class and split string into a list of elements separated by &#39;\r&#39;. </span>
<span class="n">notes_list</span> <span class="o">=</span> <span class="n">file_obj</span><span class="p">[</span><span class="s1">&#39;wave&#39;</span><span class="p">][</span><span class="s1">&#39;note&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s2">&quot;utf-8&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;</span><span class="se">\r</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="n">exclude_list</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;Excitation Energy&#39;</span><span class="p">]</span>
<span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">notes_list</span><span class="p">:</span>
<span class="k">if</span> <span class="s1">&#39;=&#39;</span> <span class="ow">in</span> <span class="n">item</span><span class="p">:</span>
<span class="n">key</span><span class="p">,</span> <span class="n">value</span> <span class="o">=</span> <span class="nb">tuple</span><span class="p">(</span><span class="n">item</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;=&#39;</span><span class="p">))</span>
<span class="c1"># TODO: check if value can be converted into a numeric type. Now all values are string type</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">exclude_list</span><span class="p">:</span>
<span class="n">file_dict</span><span class="p">[</span><span class="s1">&#39;attributes_dict&#39;</span><span class="p">][</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">value</span>
<span class="c1"># TODO: talk to Thorsten to see if there is an easier way to access the below attributes</span>
<span class="n">dimension_labels</span> <span class="o">=</span> <span class="n">file_obj</span><span class="p">[</span><span class="s1">&#39;wave&#39;</span><span class="p">][</span><span class="s1">&#39;dimension_units&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s2">&quot;utf-8&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;]&#39;</span><span class="p">)</span>
<span class="n">file_dict</span><span class="p">[</span><span class="s1">&#39;attributes_dict&#39;</span><span class="p">][</span><span class="s1">&#39;dimension_units&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span><span class="n">item</span><span class="o">+</span><span class="s1">&#39;]&#39;</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">dimension_labels</span><span class="p">[</span><span class="mi">0</span><span class="p">:</span><span class="nb">len</span><span class="p">(</span><span class="n">dimension_labels</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span><span class="p">]]</span>
<span class="c1"># Datasets and their attributes</span>
<span class="n">file_dict</span><span class="p">[</span><span class="s1">&#39;datasets&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">dataset</span> <span class="o">=</span> <span class="p">{}</span>
<span class="n">dataset</span><span class="p">[</span><span class="s1">&#39;name&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;spectrum&#39;</span>
<span class="n">dataset</span><span class="p">[</span><span class="s1">&#39;data&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">file_obj</span><span class="p">[</span><span class="s1">&#39;wave&#39;</span><span class="p">][</span><span class="s1">&#39;wData&#39;</span><span class="p">]</span>
<span class="n">dataset</span><span class="p">[</span><span class="s1">&#39;data_units&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">file_obj</span><span class="p">[</span><span class="s1">&#39;wave&#39;</span><span class="p">][</span><span class="s1">&#39;data_units&#39;</span><span class="p">]</span>
<span class="n">dataset</span><span class="p">[</span><span class="s1">&#39;shape&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">dataset</span><span class="p">[</span><span class="s1">&#39;data&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">shape</span>
<span class="n">dataset</span><span class="p">[</span><span class="s1">&#39;dtype&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">type</span><span class="p">(</span><span class="n">dataset</span><span class="p">[</span><span class="s1">&#39;data&#39;</span><span class="p">])</span>
<span class="c1"># TODO: include energy axis dataset</span>
<span class="n">file_dict</span><span class="p">[</span><span class="s1">&#39;datasets&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">dataset</span><span class="p">)</span>
<span class="k">return</span> <span class="n">file_dict</span></div>
<div class="viewcode-block" id="copy_file_in_group">
<a class="viewcode-back" href="../../modules/src.html#src.g5505_file_reader.copy_file_in_group">[docs]</a>
<span class="k">def</span> <span class="nf">copy_file_in_group</span><span class="p">(</span><span class="n">source_file_path</span><span class="p">,</span> <span class="n">dest_file_obj</span> <span class="p">:</span> <span class="n">h5py</span><span class="o">.</span><span class="n">File</span><span class="p">,</span> <span class="n">dest_group_name</span><span class="p">,</span> <span class="n">work_with_copy</span> <span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">):</span>
<span class="c1"># Create copy of original file to avoid possible file corruption and work with it.</span>
<span class="k">if</span> <span class="n">work_with_copy</span><span class="p">:</span>
<span class="n">tmp_file_path</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">make_file_copy</span><span class="p">(</span><span class="n">source_file_path</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">tmp_file_path</span> <span class="o">=</span> <span class="n">source_file_path</span>
<span class="c1"># Open backup h5 file and copy complet filesystem directory onto a group in h5file</span>
<span class="k">with</span> <span class="n">h5py</span><span class="o">.</span><span class="n">File</span><span class="p">(</span><span class="n">tmp_file_path</span><span class="p">,</span><span class="s1">&#39;r&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">src_file</span><span class="p">:</span>
<span class="n">dest_file_obj</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">source</span><span class="o">=</span> <span class="n">src_file</span><span class="p">[</span><span class="s1">&#39;/&#39;</span><span class="p">],</span> <span class="n">dest</span><span class="o">=</span> <span class="n">dest_group_name</span><span class="p">)</span>
<span class="k">if</span> <span class="s1">&#39;tmp_files&#39;</span> <span class="ow">in</span> <span class="n">tmp_file_path</span><span class="p">:</span>
<span class="n">os</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">tmp_file_path</span><span class="p">)</span></div>
<div class="viewcode-block" id="read_txt_files_as_dict">
<a class="viewcode-back" href="../../modules/src.html#src.g5505_file_reader.read_txt_files_as_dict">[docs]</a>
<span class="k">def</span> <span class="nf">read_txt_files_as_dict</span><span class="p">(</span><span class="n">filename</span> <span class="p">:</span> <span class="nb">str</span> <span class="p">,</span> <span class="n">work_with_copy</span> <span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span> <span class="p">):</span>
<span class="c1"># Get the directory of the current module</span>
<span class="n">module_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">dirname</span><span class="p">(</span><span class="vm">__file__</span><span class="p">)</span>
<span class="c1"># Construct the relative file path</span>
<span class="n">instrument_configs_path</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">module_dir</span><span class="p">,</span> <span class="s1">&#39;instruments&#39;</span><span class="p">,</span> <span class="s1">&#39;text_data_sources.yaml&#39;</span><span class="p">)</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">instrument_configs_path</span><span class="p">,</span><span class="s1">&#39;r&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">stream</span><span class="p">:</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">config_dict</span> <span class="o">=</span> <span class="n">yaml</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">stream</span><span class="p">,</span> <span class="n">Loader</span><span class="o">=</span><span class="n">yaml</span><span class="o">.</span><span class="n">FullLoader</span><span class="p">)</span>
<span class="k">except</span> <span class="n">yaml</span><span class="o">.</span><span class="n">YAMLError</span> <span class="k">as</span> <span class="n">exc</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="n">exc</span><span class="p">)</span>
<span class="c1"># Verify if file can be read by available intrument configurations.</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">any</span><span class="p">(</span><span class="n">key</span> <span class="ow">in</span> <span class="n">filename</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">sep</span><span class="p">,</span><span class="s1">&#39;/&#39;</span><span class="p">)</span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">config_dict</span><span class="o">.</span><span class="n">keys</span><span class="p">()):</span>
<span class="k">return</span> <span class="p">{}</span>
<span class="c1">#TODO: this may be prone to error if assumed folder structure is non compliant </span>
<span class="n">file_encoding</span> <span class="o">=</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;default&#39;</span><span class="p">][</span><span class="s1">&#39;file_encoding&#39;</span><span class="p">]</span> <span class="c1">#&#39;utf-8&#39;</span>
<span class="n">separator</span> <span class="o">=</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;default&#39;</span><span class="p">][</span><span class="s1">&#39;separator&#39;</span><span class="p">]</span>
<span class="n">table_header</span> <span class="o">=</span> <span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;default&#39;</span><span class="p">][</span><span class="s1">&#39;table_header&#39;</span><span class="p">]</span>
<span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">config_dict</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
<span class="k">if</span> <span class="n">key</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">&#39;/&#39;</span><span class="p">,</span><span class="n">os</span><span class="o">.</span><span class="n">sep</span><span class="p">)</span> <span class="ow">in</span> <span class="n">filename</span><span class="p">:</span>
<span class="n">file_encoding</span> <span class="o">=</span> <span class="n">config_dict</span><span class="p">[</span><span class="n">key</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;file_encoding&#39;</span><span class="p">,</span><span class="n">file_encoding</span><span class="p">)</span>
<span class="n">separator</span> <span class="o">=</span> <span class="n">config_dict</span><span class="p">[</span><span class="n">key</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;separator&#39;</span><span class="p">,</span><span class="n">separator</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">&#39;</span><span class="se">\\</span><span class="s1">t&#39;</span><span class="p">,</span><span class="s1">&#39;</span><span class="se">\t</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="n">table_header</span> <span class="o">=</span> <span class="n">config_dict</span><span class="p">[</span><span class="n">key</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;table_header&#39;</span><span class="p">,</span><span class="n">table_header</span><span class="p">)</span>
<span class="n">timestamp_variables</span> <span class="o">=</span> <span class="n">config_dict</span><span class="p">[</span><span class="n">key</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;timestamp&#39;</span><span class="p">,[])</span>
<span class="n">datetime_format</span> <span class="o">=</span> <span class="n">config_dict</span><span class="p">[</span><span class="n">key</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;datetime_format&#39;</span><span class="p">,[])</span>
<span class="n">description_dict</span> <span class="o">=</span> <span class="p">{}</span>
<span class="c1">#link_to_description = config_dict[key].get(&#39;link_to_description&#39;,[]).replace(&#39;/&#39;,os.sep)</span>
<span class="n">link_to_description</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">module_dir</span><span class="p">,</span><span class="n">config_dict</span><span class="p">[</span><span class="n">key</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;link_to_description&#39;</span><span class="p">,[])</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">&#39;/&#39;</span><span class="p">,</span><span class="n">os</span><span class="o">.</span><span class="n">sep</span><span class="p">))</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">link_to_description</span><span class="p">,</span><span class="s1">&#39;r&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">stream</span><span class="p">:</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">description_dict</span> <span class="o">=</span> <span class="n">yaml</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">stream</span><span class="p">,</span> <span class="n">Loader</span><span class="o">=</span><span class="n">yaml</span><span class="o">.</span><span class="n">FullLoader</span><span class="p">)</span>
<span class="k">except</span> <span class="n">yaml</span><span class="o">.</span><span class="n">YAMLError</span> <span class="k">as</span> <span class="n">exc</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="n">exc</span><span class="p">)</span>
<span class="k">break</span>
<span class="c1">#if &#39;None&#39; in table_header:</span>
<span class="c1"># return {}</span>
<span class="c1"># Read header as a dictionary and detect where data table starts</span>
<span class="n">header_dict</span> <span class="o">=</span> <span class="p">{}</span>
<span class="n">data_start</span> <span class="o">=</span> <span class="kc">False</span>
<span class="c1"># Work with copy of the file for safety</span>
<span class="k">if</span> <span class="n">work_with_copy</span><span class="p">:</span>
<span class="n">tmp_filename</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">make_file_copy</span><span class="p">(</span><span class="n">source_file_path</span><span class="o">=</span><span class="n">filename</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">tmp_filename</span> <span class="o">=</span> <span class="n">filename</span>
<span class="c1">#with open(tmp_filename,&#39;rb&#39;,encoding=file_encoding,errors=&#39;ignore&#39;) as f:</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">tmp_filename</span><span class="p">,</span><span class="s1">&#39;rb&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
<span class="n">table_preamble</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">line_number</span><span class="p">,</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">f</span><span class="p">):</span>
<span class="k">if</span> <span class="n">table_header</span> <span class="ow">in</span> <span class="n">line</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="n">file_encoding</span><span class="p">):</span>
<span class="n">list_of_substrings</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="n">file_encoding</span><span class="p">)</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">separator</span><span class="p">)</span>
<span class="c1"># Count occurrences of each substring</span>
<span class="n">substring_counts</span> <span class="o">=</span> <span class="n">collections</span><span class="o">.</span><span class="n">Counter</span><span class="p">(</span><span class="n">list_of_substrings</span><span class="p">)</span>
<span class="n">data_start</span> <span class="o">=</span> <span class="kc">True</span>
<span class="c1"># Generate column names with appended index only for repeated substrings</span>
<span class="n">column_names</span> <span class="o">=</span> <span class="p">[</span><span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">i</span><span class="si">}</span><span class="s2">_</span><span class="si">{</span><span class="n">name</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="si">}</span><span class="s2">&quot;</span> <span class="k">if</span> <span class="n">substring_counts</span><span class="p">[</span><span class="n">name</span><span class="p">]</span> <span class="o">&gt;</span> <span class="mi">1</span> <span class="k">else</span> <span class="n">name</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">name</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">list_of_substrings</span><span class="p">)]</span>
<span class="c1">#column_names = [str(i)+&#39;_&#39;+name.strip() for i, name in enumerate(list_of_substrings)]</span>
<span class="c1">#column_names = []</span>
<span class="c1">#for i, name in enumerate(list_of_substrings):</span>
<span class="c1"># column_names.append(str(i)+&#39;_&#39;+name) </span>
<span class="c1">#print(line_number, len(column_names ),&#39;\n&#39;)</span>
<span class="k">break</span>
<span class="c1"># Subdivide line into words, and join them by single space. </span>
<span class="c1"># I asumme this can produce a cleaner line that contains no weird separator characters \t \r or extra spaces and so on.</span>
<span class="n">list_of_substrings</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="n">file_encoding</span><span class="p">)</span><span class="o">.</span><span class="n">split</span><span class="p">()</span>
<span class="c1"># TODO: ideally we should use a multilinear string but the yalm parser is not recognizing \n as special character</span>
<span class="c1">#line = &#39; &#39;.join(list_of_substrings+[&#39;\n&#39;])</span>
<span class="c1">#line = &#39; &#39;.join(list_of_substrings) </span>
<span class="n">table_preamble</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s1">&#39; &#39;</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="n">item</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">list_of_substrings</span><span class="p">]))</span><span class="c1"># += new_line </span>
<span class="c1"># Represent string values as fixed length strings in the HDF5 file, which need</span>
<span class="c1"># to be decoded as string when we read them. It provides better control than variable strings,</span>
<span class="c1"># at the expense of flexibility.</span>
<span class="c1"># https://docs.h5py.org/en/stable/strings.html</span>
<span class="k">if</span> <span class="n">table_preamble</span><span class="p">:</span>
<span class="n">header_dict</span><span class="p">[</span><span class="s2">&quot;table_preamble&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">convert_string_to_bytes</span><span class="p">(</span><span class="n">table_preamble</span><span class="p">)</span>
<span class="c1"># TODO: it does not work with separator as none :(. fix for RGA</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">tmp_filename</span><span class="p">,</span>
<span class="n">delimiter</span> <span class="o">=</span> <span class="n">separator</span><span class="p">,</span>
<span class="n">header</span><span class="o">=</span><span class="n">line_number</span><span class="p">,</span>
<span class="c1">#encoding=&#39;latin-1&#39;,</span>
<span class="n">encoding</span> <span class="o">=</span> <span class="n">file_encoding</span><span class="p">,</span>
<span class="n">names</span><span class="o">=</span><span class="n">column_names</span><span class="p">,</span>
<span class="n">skip_blank_lines</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">df_numerical_attrs</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">select_dtypes</span><span class="p">(</span><span class="n">include</span> <span class="o">=</span><span class="s1">&#39;number&#39;</span><span class="p">)</span>
<span class="n">df_categorical_attrs</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">select_dtypes</span><span class="p">(</span><span class="n">exclude</span><span class="o">=</span><span class="s1">&#39;number&#39;</span><span class="p">)</span>
<span class="n">numerical_variables</span> <span class="o">=</span> <span class="p">[</span><span class="n">item</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">df_numerical_attrs</span><span class="o">.</span><span class="n">columns</span><span class="p">]</span>
<span class="c1"># Consolidate into single timestamp column the separate columns &#39;date&#39; &#39;time&#39; specified in text_data_source.yaml</span>
<span class="k">if</span> <span class="n">timestamp_variables</span><span class="p">:</span>
<span class="c1">#df_categorical_attrs[&#39;timestamps&#39;] = [&#39; &#39;.join(df_categorical_attrs.loc[i,timestamp_variables].to_numpy()) for i in df.index]</span>
<span class="c1">#df_categorical_attrs[&#39;timestamps&#39;] = [ df_categorical_attrs.loc[i,&#39;0_Date&#39;]+&#39; &#39;+df_categorical_attrs.loc[i,&#39;1_Time&#39;] for i in df.index]</span>
<span class="c1">#df_categorical_attrs[&#39;timestamps&#39;] = df_categorical_attrs[timestamp_variables].astype(str).agg(&#39; &#39;.join, axis=1)</span>
<span class="n">timestamps_name</span> <span class="o">=</span> <span class="s1">&#39; &#39;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">timestamp_variables</span><span class="p">)</span>
<span class="n">df_categorical_attrs</span><span class="p">[</span> <span class="n">timestamps_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">df_categorical_attrs</span><span class="p">[</span><span class="n">timestamp_variables</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">str</span><span class="p">)</span><span class="o">.</span><span class="n">agg</span><span class="p">(</span><span class="s1">&#39; &#39;</span><span class="o">.</span><span class="n">join</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="n">valid_indices</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">if</span> <span class="n">datetime_format</span><span class="p">:</span>
<span class="n">df_categorical_attrs</span><span class="p">[</span> <span class="n">timestamps_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">to_datetime</span><span class="p">(</span><span class="n">df_categorical_attrs</span><span class="p">[</span> <span class="n">timestamps_name</span><span class="p">],</span><span class="nb">format</span><span class="o">=</span><span class="n">datetime_format</span><span class="p">,</span><span class="n">errors</span><span class="o">=</span><span class="s1">&#39;coerce&#39;</span><span class="p">)</span>
<span class="n">valid_indices</span> <span class="o">=</span> <span class="n">df_categorical_attrs</span><span class="o">.</span><span class="n">dropna</span><span class="p">(</span><span class="n">subset</span><span class="o">=</span><span class="p">[</span><span class="n">timestamps_name</span><span class="p">])</span><span class="o">.</span><span class="n">index</span>
<span class="n">df_categorical_attrs</span> <span class="o">=</span> <span class="n">df_categorical_attrs</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">valid_indices</span><span class="p">,:]</span>
<span class="n">df_numerical_attrs</span> <span class="o">=</span> <span class="n">df_numerical_attrs</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">valid_indices</span><span class="p">,:]</span>
<span class="n">df_categorical_attrs</span><span class="p">[</span><span class="n">timestamps_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">df_categorical_attrs</span><span class="p">[</span><span class="n">timestamps_name</span><span class="p">]</span><span class="o">.</span><span class="n">dt</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="n">config_dict</span><span class="p">[</span><span class="s1">&#39;default&#39;</span><span class="p">][</span><span class="s1">&#39;desired_format&#39;</span><span class="p">])</span>
<span class="n">startdate</span> <span class="o">=</span> <span class="n">df_categorical_attrs</span><span class="p">[</span><span class="n">timestamps_name</span><span class="p">]</span><span class="o">.</span><span class="n">min</span><span class="p">()</span>
<span class="n">enddate</span> <span class="o">=</span> <span class="n">df_categorical_attrs</span><span class="p">[</span><span class="n">timestamps_name</span><span class="p">]</span><span class="o">.</span><span class="n">max</span><span class="p">()</span>
<span class="n">df_categorical_attrs</span><span class="p">[</span><span class="n">timestamps_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">df_categorical_attrs</span><span class="p">[</span><span class="n">timestamps_name</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">str</span><span class="p">)</span>
<span class="c1">#header_dict.update({&#39;stastrrtdate&#39;:startdate,&#39;enddate&#39;:enddate})</span>
<span class="n">header_dict</span><span class="p">[</span><span class="s1">&#39;startdate&#39;</span><span class="p">]</span><span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">startdate</span><span class="p">)</span>
<span class="n">header_dict</span><span class="p">[</span><span class="s1">&#39;enddate&#39;</span><span class="p">]</span><span class="o">=</span><span class="nb">str</span><span class="p">(</span><span class="n">enddate</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">timestamp_variables</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
<span class="n">df_categorical_attrs</span> <span class="o">=</span> <span class="n">df_categorical_attrs</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">columns</span> <span class="o">=</span> <span class="n">timestamp_variables</span><span class="p">)</span>
<span class="c1">#df_categorical_attrs.reindex(drop=True)</span>
<span class="c1">#df_numerical_attrs.reindex(drop=True)</span>
<span class="n">categorical_variables</span> <span class="o">=</span> <span class="p">[</span><span class="n">item</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">df_categorical_attrs</span><span class="o">.</span><span class="n">columns</span><span class="p">]</span>
<span class="c1">####</span>
<span class="c1">#elif &#39;RGA&#39; in filename:</span>
<span class="c1"># df_categorical_attrs = df_categorical_attrs.rename(columns={&#39;0_Time(s)&#39; : &#39;timestamps&#39;})</span>
<span class="c1">###</span>
<span class="n">file_dict</span> <span class="o">=</span> <span class="p">{}</span>
<span class="n">path_tail</span><span class="p">,</span> <span class="n">path_head</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">tmp_filename</span><span class="p">)</span>
<span class="n">file_dict</span><span class="p">[</span><span class="s1">&#39;name&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">path_head</span>
<span class="c1"># TODO: review this header dictionary, it may not be the best way to represent header data</span>
<span class="n">file_dict</span><span class="p">[</span><span class="s1">&#39;attributes_dict&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">header_dict</span>
<span class="n">file_dict</span><span class="p">[</span><span class="s1">&#39;datasets&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
<span class="c1">####</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">concat</span><span class="p">((</span><span class="n">df_categorical_attrs</span><span class="p">,</span><span class="n">df_numerical_attrs</span><span class="p">),</span><span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="c1">#if numerical_variables:</span>
<span class="n">dataset</span> <span class="o">=</span> <span class="p">{}</span>
<span class="n">dataset</span><span class="p">[</span><span class="s1">&#39;name&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;data_table&#39;</span><span class="c1">#_numerical_variables&#39;</span>
<span class="n">dataset</span><span class="p">[</span><span class="s1">&#39;data&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">dataframe_to_np_structured_array</span><span class="p">(</span><span class="n">df</span><span class="p">)</span> <span class="c1">#df_numerical_attrs.to_numpy()</span>
<span class="n">dataset</span><span class="p">[</span><span class="s1">&#39;shape&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">dataset</span><span class="p">[</span><span class="s1">&#39;data&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">shape</span>
<span class="n">dataset</span><span class="p">[</span><span class="s1">&#39;dtype&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">type</span><span class="p">(</span><span class="n">dataset</span><span class="p">[</span><span class="s1">&#39;data&#39;</span><span class="p">])</span>
<span class="c1">#dataset[&#39;data_units&#39;] = file_obj[&#39;wave&#39;][&#39;data_units&#39;] </span>
<span class="c1"># </span>
<span class="c1"># Create attribute descriptions based on description_dict</span>
<span class="n">dataset</span><span class="p">[</span><span class="s1">&#39;attributes&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">for</span> <span class="n">column_name</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">columns</span><span class="p">:</span>
<span class="n">column_attr_dict</span> <span class="o">=</span> <span class="n">description_dict</span><span class="p">[</span><span class="s1">&#39;table_header&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">column_name</span><span class="p">,</span>
<span class="p">{</span><span class="s1">&#39;note&#39;</span><span class="p">:</span><span class="s1">&#39;there was no description available. Review instrument files.&#39;</span><span class="p">})</span>
<span class="n">dataset</span><span class="p">[</span><span class="s1">&#39;attributes&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">update</span><span class="p">({</span><span class="n">column_name</span><span class="p">:</span> <span class="n">utils</span><span class="o">.</span><span class="n">parse_attribute</span><span class="p">(</span><span class="n">column_attr_dict</span><span class="p">)})</span>
<span class="c1">#try:</span>
<span class="c1"># dataset[&#39;attributes&#39;] = description_dict[&#39;table_header&#39;].copy()</span>
<span class="c1"># for key in description_dict[&#39;table_header&#39;].keys():</span>
<span class="c1"># if not key in numerical_variables:</span>
<span class="c1"># dataset[&#39;attributes&#39;].pop(key) # delete key</span>
<span class="c1"># else:</span>
<span class="c1"># dataset[&#39;attributes&#39;][key] = utils.parse_attribute(dataset[&#39;attributes&#39;][key])</span>
<span class="c1"># if timestamps_name in categorical_variables:</span>
<span class="c1"># dataset[&#39;attributes&#39;][timestamps_name] = utils.parse_attribute({&#39;unit&#39;:&#39;YYYY-MM-DD HH:MM:SS.ffffff&#39;})</span>
<span class="c1">#except ValueError as err:</span>
<span class="c1"># print(err)</span>
<span class="n">file_dict</span><span class="p">[</span><span class="s1">&#39;datasets&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">dataset</span><span class="p">)</span>
<span class="c1">#if categorical_variables:</span>
<span class="c1"># dataset = {}</span>
<span class="c1"># dataset[&#39;name&#39;] = &#39;table_categorical_variables&#39;</span>
<span class="c1"># dataset[&#39;data&#39;] = dataframe_to_np_structured_array(df_categorical_attrs) #df_categorical_attrs.loc[:,categorical_variables].to_numpy()</span>
<span class="c1"># dataset[&#39;shape&#39;] = dataset[&#39;data&#39;].shape</span>
<span class="c1"># dataset[&#39;dtype&#39;] = type(dataset[&#39;data&#39;])</span>
<span class="c1"># if timestamps_name in categorical_variables:</span>
<span class="c1"># dataset[&#39;attributes&#39;] = {timestamps_name: utils.parse_attribute({&#39;unit&#39;:&#39;YYYY-MM-DD HH:MM:SS.ffffff&#39;})}</span>
<span class="c1"># file_dict[&#39;datasets&#39;].append(dataset) </span>
<span class="k">except</span><span class="p">:</span>
<span class="k">return</span> <span class="p">{}</span>
<span class="k">return</span> <span class="n">file_dict</span></div>
<div class="viewcode-block" id="main">
<a class="viewcode-back" href="../../modules/src.html#src.g5505_file_reader.main">[docs]</a>
<span class="k">def</span> <span class="nf">main</span><span class="p">():</span>
<span class="n">inputfile_dir</span> <span class="o">=</span> <span class="s1">&#39;</span><span class="se">\\\\</span><span class="s1">fs101</span><span class="se">\\</span><span class="s1">5505</span><span class="se">\\</span><span class="s1">People</span><span class="se">\\</span><span class="s1">Juan</span><span class="se">\\</span><span class="s1">TypicalBeamTime&#39;</span>
<span class="n">file_dict</span> <span class="o">=</span> <span class="n">read_xps_ibw_file_as_dict</span><span class="p">(</span><span class="n">inputfile_dir</span><span class="o">+</span><span class="s1">&#39;</span><span class="se">\\</span><span class="s1">SES</span><span class="se">\\</span><span class="s1">0069069_N1s_495eV.ibw&#39;</span><span class="p">)</span>
<span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">file_dict</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
<span class="nb">print</span><span class="p">(</span><span class="n">key</span><span class="p">,</span><span class="n">file_dict</span><span class="p">[</span><span class="n">key</span><span class="p">])</span></div>
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">&#39;__main__&#39;</span><span class="p">:</span>
<span class="n">main</span><span class="p">()</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;:)&#39;</span><span class="p">)</span>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, JFFO.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@@ -0,0 +1,164 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>src.hdf5_data_extraction &mdash; DIMA 1.0.0 documentation</title>
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../_static/documentation_options.js?v=8d563738"></script>
<script src="../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script src="../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../index.html" class="icon icon-home">
DIMA
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html">HDF5 data operations</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.hdf5_lib">Data integration with HDF5</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.metadata_review_lib">Data annotation and review</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.hdf5_vis">Data visualization</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.g5505_file_reader">Utilities</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../index.html">DIMA</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
<li class="breadcrumb-item active">src.hdf5_data_extraction</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for src.hdf5_data_extraction</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">h5py</span>
<span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="kn">import</span> <span class="nn">src.hdf5_vis</span> <span class="k">as</span> <span class="nn">hdf5_vis</span>
<div class="viewcode-block" id="read_dataset_from_hdf5file">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_data_extraction.read_dataset_from_hdf5file">[docs]</a>
<span class="k">def</span> <span class="nf">read_dataset_from_hdf5file</span><span class="p">(</span><span class="n">hdf5_file_path</span><span class="p">,</span> <span class="n">dataset_path</span><span class="p">):</span>
<span class="c1"># Open the HDF5 file</span>
<span class="k">with</span> <span class="n">h5py</span><span class="o">.</span><span class="n">File</span><span class="p">(</span><span class="n">hdf5_file_path</span><span class="p">,</span> <span class="s1">&#39;r&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">hdf</span><span class="p">:</span>
<span class="c1"># Load the dataset</span>
<span class="n">dataset</span> <span class="o">=</span> <span class="n">hdf</span><span class="p">[</span><span class="n">dataset_path</span><span class="p">]</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">shape</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dataset</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
<span class="n">dataset</span><span class="o">.</span><span class="n">read_direct</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
<span class="k">for</span> <span class="n">col_name</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">select_dtypes</span><span class="p">(</span><span class="n">exclude</span><span class="o">=</span><span class="s1">&#39;number&#39;</span><span class="p">):</span>
<span class="n">df</span><span class="p">[</span><span class="n">col_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="n">col_name</span><span class="p">]</span><span class="o">.</span><span class="n">str</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s1">&#39;utf-8&#39;</span><span class="p">)</span> <span class="c1">#apply(lambda x: x.decode(&#39;utf-8&#39;) if isinstance(x,bytes) else x)</span>
<span class="c1">## Extract metadata (attributes) and convert to a dictionary</span>
<span class="c1">#metadata = hdf5_vis.construct_attributes_dict(hdf[dataset_name].attrs)</span>
<span class="c1">## Create a one-row DataFrame with the metadata</span>
<span class="c1">#metadata_df = pd.DataFrame.from_dict(data, orient=&#39;columns&#39;) </span>
<span class="k">return</span> <span class="n">df</span></div>
<div class="viewcode-block" id="read_metadata_from_hdf5obj">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_data_extraction.read_metadata_from_hdf5obj">[docs]</a>
<span class="k">def</span> <span class="nf">read_metadata_from_hdf5obj</span><span class="p">(</span><span class="n">hdf5_file_path</span><span class="p">,</span> <span class="n">obj_path</span><span class="p">):</span>
<span class="c1"># TODO: Complete this function</span>
<span class="n">metadata_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="o">.</span><span class="n">empty</span><span class="p">()</span>
<span class="k">return</span> <span class="n">metadata_df</span></div>
<div class="viewcode-block" id="list_datasets_in_hdf5file">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_data_extraction.list_datasets_in_hdf5file">[docs]</a>
<span class="k">def</span> <span class="nf">list_datasets_in_hdf5file</span><span class="p">(</span><span class="n">hdf5_file_path</span><span class="p">):</span>
<span class="k">def</span> <span class="nf">get_datasets</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">obj</span><span class="p">,</span> <span class="n">list_of_datasets</span><span class="p">):</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span><span class="n">h5py</span><span class="o">.</span><span class="n">Dataset</span><span class="p">):</span>
<span class="n">list_of_datasets</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">name</span><span class="p">)</span>
<span class="c1">#print(f&#39;Adding dataset: {name}&#39;) #tail: {head} head: {tail}&#39;)</span>
<span class="k">with</span> <span class="n">h5py</span><span class="o">.</span><span class="n">File</span><span class="p">(</span><span class="n">hdf5_file_path</span><span class="p">,</span><span class="s1">&#39;r&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">file</span><span class="p">:</span>
<span class="n">list_of_datasets</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">file</span><span class="o">.</span><span class="n">visititems</span><span class="p">(</span><span class="k">lambda</span> <span class="n">name</span><span class="p">,</span> <span class="n">obj</span><span class="p">:</span> <span class="n">get_datasets</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">obj</span><span class="p">,</span> <span class="n">list_of_datasets</span><span class="p">))</span>
<span class="n">dataset_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;dataset_name&#39;</span><span class="p">:</span><span class="n">list_of_datasets</span><span class="p">})</span>
<span class="n">dataset_df</span><span class="p">[</span><span class="s1">&#39;parent_instrument&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">dataset_df</span><span class="p">[</span><span class="s1">&#39;dataset_name&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;/&#39;</span><span class="p">)[</span><span class="o">-</span><span class="mi">3</span><span class="p">])</span>
<span class="n">dataset_df</span><span class="p">[</span><span class="s1">&#39;parent_file&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">dataset_df</span><span class="p">[</span><span class="s1">&#39;dataset_name&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;/&#39;</span><span class="p">)[</span><span class="o">-</span><span class="mi">2</span><span class="p">])</span>
<span class="k">return</span> <span class="n">dataset_df</span></div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, JFFO.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@@ -0,0 +1,412 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>src.hdf5_lib &mdash; DIMA 1.0.0 documentation</title>
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../_static/documentation_options.js?v=8d563738"></script>
<script src="../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script src="../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../index.html" class="icon icon-home">
DIMA
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html">HDF5 data operations</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.hdf5_lib">Data integration with HDF5</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-pipelines.data_integration">Pipelines and workflows</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.hdf5_vis">Data visualization</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-utils.g5505_utils">Utilities</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../index.html">DIMA</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
<li class="breadcrumb-item active">src.hdf5_lib</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for src.hdf5_lib</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">sys</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="n">root_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">curdir</span><span class="p">)</span>
<span class="n">sys</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">root_dir</span><span class="p">)</span>
<span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">import</span> <span class="nn">h5py</span>
<span class="kn">import</span> <span class="nn">logging</span>
<span class="kn">import</span> <span class="nn">utils.g5505_utils</span> <span class="k">as</span> <span class="nn">utils</span>
<span class="kn">import</span> <span class="nn">instruments.readers.filereader_registry</span> <span class="k">as</span> <span class="nn">filereader_registry</span>
<span class="k">def</span> <span class="nf">__transfer_file_dict_to_hdf5</span><span class="p">(</span><span class="n">h5file</span><span class="p">,</span> <span class="n">group_name</span><span class="p">,</span> <span class="n">file_dict</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Transfers data from a file_dict to an HDF5 file.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> h5file : h5py.File</span>
<span class="sd"> HDF5 file object where the data will be written.</span>
<span class="sd"> group_name : str</span>
<span class="sd"> Name of the HDF5 group where data will be stored.</span>
<span class="sd"> file_dict : dict</span>
<span class="sd"> Dictionary containing file data to be transferred. Required structure:</span>
<span class="sd"> {</span>
<span class="sd"> &#39;name&#39;: str,</span>
<span class="sd"> &#39;attributes_dict&#39;: dict,</span>
<span class="sd"> &#39;datasets&#39;: [</span>
<span class="sd"> {</span>
<span class="sd"> &#39;name&#39;: str,</span>
<span class="sd"> &#39;data&#39;: array-like,</span>
<span class="sd"> &#39;shape&#39;: tuple,</span>
<span class="sd"> &#39;attributes&#39;: dict (optional)</span>
<span class="sd"> },</span>
<span class="sd"> ...</span>
<span class="sd"> ]</span>
<span class="sd"> }</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> None</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">file_dict</span><span class="p">:</span>
<span class="k">return</span>
<span class="k">try</span><span class="p">:</span>
<span class="c1"># Create group and add their attributes</span>
<span class="n">group</span> <span class="o">=</span> <span class="n">h5file</span><span class="p">[</span><span class="n">group_name</span><span class="p">]</span><span class="o">.</span><span class="n">create_group</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="n">file_dict</span><span class="p">[</span><span class="s1">&#39;name&#39;</span><span class="p">])</span>
<span class="c1"># Add group attributes </span>
<span class="n">group</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">file_dict</span><span class="p">[</span><span class="s1">&#39;attributes_dict&#39;</span><span class="p">])</span>
<span class="c1"># Add datasets to the just created group</span>
<span class="k">for</span> <span class="n">dataset</span> <span class="ow">in</span> <span class="n">file_dict</span><span class="p">[</span><span class="s1">&#39;datasets&#39;</span><span class="p">]:</span>
<span class="n">dataset_obj</span> <span class="o">=</span> <span class="n">group</span><span class="o">.</span><span class="n">create_dataset</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="n">dataset</span><span class="p">[</span><span class="s1">&#39;name&#39;</span><span class="p">],</span>
<span class="n">data</span><span class="o">=</span><span class="n">dataset</span><span class="p">[</span><span class="s1">&#39;data&#39;</span><span class="p">],</span>
<span class="n">shape</span><span class="o">=</span><span class="n">dataset</span><span class="p">[</span><span class="s1">&#39;shape&#39;</span><span class="p">]</span>
<span class="p">)</span>
<span class="c1"># Add dataset&#39;s attributes </span>
<span class="n">attributes</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;attributes&#39;</span><span class="p">,</span> <span class="p">{})</span>
<span class="n">dataset_obj</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">attributes</span><span class="p">)</span>
<span class="n">group</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">&#39;last_update_date&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">created_at</span><span class="p">()</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s1">&#39;utf-8&#39;</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">inst</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="n">inst</span><span class="p">)</span>
<span class="n">logging</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s1">&#39;Failed to transfer data into HDF5: </span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span> <span class="n">inst</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">__copy_file_in_group</span><span class="p">(</span><span class="n">source_file_path</span><span class="p">,</span> <span class="n">dest_file_obj</span> <span class="p">:</span> <span class="n">h5py</span><span class="o">.</span><span class="n">File</span><span class="p">,</span> <span class="n">dest_group_name</span><span class="p">,</span> <span class="n">work_with_copy</span> <span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">):</span>
<span class="c1"># Create copy of original file to avoid possible file corruption and work with it.</span>
<span class="k">if</span> <span class="n">work_with_copy</span><span class="p">:</span>
<span class="n">tmp_file_path</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">make_file_copy</span><span class="p">(</span><span class="n">source_file_path</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">tmp_file_path</span> <span class="o">=</span> <span class="n">source_file_path</span>
<span class="c1"># Open backup h5 file and copy complet filesystem directory onto a group in h5file</span>
<span class="k">with</span> <span class="n">h5py</span><span class="o">.</span><span class="n">File</span><span class="p">(</span><span class="n">tmp_file_path</span><span class="p">,</span><span class="s1">&#39;r&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">src_file</span><span class="p">:</span>
<span class="n">dest_file_obj</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">source</span><span class="o">=</span> <span class="n">src_file</span><span class="p">[</span><span class="s1">&#39;/&#39;</span><span class="p">],</span> <span class="n">dest</span><span class="o">=</span> <span class="n">dest_group_name</span><span class="p">)</span>
<span class="k">if</span> <span class="s1">&#39;tmp_files&#39;</span> <span class="ow">in</span> <span class="n">tmp_file_path</span><span class="p">:</span>
<span class="n">os</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">tmp_file_path</span><span class="p">)</span>
<div class="viewcode-block" id="create_hdf5_file_from_filesystem_path">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_lib.create_hdf5_file_from_filesystem_path">[docs]</a>
<span class="k">def</span> <span class="nf">create_hdf5_file_from_filesystem_path</span><span class="p">(</span><span class="n">path_to_input_directory</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
<span class="n">path_to_filenames_dict</span><span class="p">:</span> <span class="nb">dict</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">select_dir_keywords</span> <span class="p">:</span> <span class="nb">list</span> <span class="o">=</span> <span class="p">[],</span>
<span class="n">root_metadata_dict</span> <span class="p">:</span> <span class="nb">dict</span> <span class="o">=</span> <span class="p">{},</span> <span class="n">mode</span> <span class="o">=</span> <span class="s1">&#39;w&#39;</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Creates an .h5 file with name &quot;output_filename&quot; that preserves the directory tree (or folder structure)</span>
<span class="sd"> of a given filesystem path.</span>
<span class="sd"> The data integration capabilities are limited by our file reader, which can only access data from a list of</span>
<span class="sd"> admissible file formats. These, however, can be extended. Directories are groups in the resulting HDF5 file.</span>
<span class="sd"> Files are formatted as composite objects consisting of a group, file, and attributes.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> output_filename : str</span>
<span class="sd"> Name of the output HDF5 file.</span>
<span class="sd"> path_to_input_directory : str</span>
<span class="sd"> Path to root directory, specified with forward slashes, e.g., path/to/root.</span>
<span class="sd"> path_to_filenames_dict : dict, optional</span>
<span class="sd"> A pre-processed dictionary where keys are directory paths on the input directory&#39;s tree and values are lists of files.</span>
<span class="sd"> If provided, &#39;input_file_system_path&#39; is ignored.</span>
<span class="sd"> select_dir_keywords : list</span>
<span class="sd"> List of string elements to consider or select only directory paths that contain</span>
<span class="sd"> a word in &#39;select_dir_keywords&#39;. When empty, all directory paths are considered</span>
<span class="sd"> to be included in the HDF5 file group hierarchy.</span>
<span class="sd"> root_metadata_dict : dict</span>
<span class="sd"> Metadata to include at the root level of the HDF5 file.</span>
<span class="sd"> mode : str</span>
<span class="sd"> &#39;w&#39; create File, truncate if it exists, or &#39;r+&#39; read/write, File must exists. By default, mode = &quot;w&quot;.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> output_filename : str</span>
<span class="sd"> Path to the created HDF5 file.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">mode</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">&#39;w&#39;</span><span class="p">,</span><span class="s1">&#39;r+&#39;</span><span class="p">]:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;Parameter mode must take values in [&quot;w&quot;,&quot;r+&quot;]&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="s1">&#39;/&#39;</span> <span class="ow">in</span> <span class="n">path_to_input_directory</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">&#39;path_to_input_directory needs to be specified using forward slashes &quot;/&quot;.&#39;</span> <span class="p">)</span>
<span class="c1">#path_to_output_directory = os.path.join(path_to_input_directory,&#39;..&#39;)</span>
<span class="n">path_to_input_directory</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">normpath</span><span class="p">(</span><span class="n">path_to_input_directory</span><span class="p">)</span><span class="o">.</span><span class="n">rstrip</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">sep</span><span class="p">)</span>
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">keyword</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">select_dir_keywords</span><span class="p">):</span>
<span class="n">select_dir_keywords</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="n">keyword</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">&#39;/&#39;</span><span class="p">,</span><span class="n">os</span><span class="o">.</span><span class="n">sep</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">path_to_filenames_dict</span><span class="p">:</span>
<span class="c1"># On dry_run=True, returns path to files dictionary of the output directory without making a actual copy of the input directory. </span>
<span class="c1"># Therefore, there wont be a copying conflict by setting up input and output directories the same</span>
<span class="n">path_to_filenames_dict</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">copy_directory_with_contraints</span><span class="p">(</span><span class="n">input_dir_path</span><span class="o">=</span><span class="n">path_to_input_directory</span><span class="p">,</span>
<span class="n">output_dir_path</span><span class="o">=</span><span class="n">path_to_input_directory</span><span class="p">,</span>
<span class="n">dry_run</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="c1"># Set input_directory as copied input directory</span>
<span class="n">root_dir</span> <span class="o">=</span> <span class="n">path_to_input_directory</span>
<span class="n">path_to_output_file</span> <span class="o">=</span> <span class="n">path_to_input_directory</span><span class="o">.</span><span class="n">rstrip</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">sep</span><span class="p">)</span> <span class="o">+</span> <span class="s1">&#39;.h5&#39;</span>
<span class="k">with</span> <span class="n">h5py</span><span class="o">.</span><span class="n">File</span><span class="p">(</span><span class="n">path_to_output_file</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="n">mode</span><span class="p">,</span> <span class="n">track_order</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> <span class="k">as</span> <span class="n">h5file</span><span class="p">:</span>
<span class="n">number_of_dirs</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">path_to_filenames_dict</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
<span class="n">dir_number</span> <span class="o">=</span> <span class="mi">1</span>
<span class="k">for</span> <span class="n">dirpath</span><span class="p">,</span> <span class="n">filtered_filenames_list</span> <span class="ow">in</span> <span class="n">path_to_filenames_dict</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="n">start_message</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">&#39;Starting to transfer files in directory: </span><span class="si">{</span><span class="n">dirpath</span><span class="si">}</span><span class="s1">&#39;</span>
<span class="n">end_message</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">&#39;</span><span class="se">\n</span><span class="s1">Completed transferring files in directory: </span><span class="si">{</span><span class="n">dirpath</span><span class="si">}</span><span class="s1">&#39;</span>
<span class="c1"># Print and log the start message</span>
<span class="nb">print</span><span class="p">(</span><span class="n">start_message</span><span class="p">)</span>
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="n">start_message</span><span class="p">)</span>
<span class="c1"># Check if filtered_filenames_list is nonempty. TODO: This is perhaps redundant by design of path_to_filenames_dict. </span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">filtered_filenames_list</span><span class="p">:</span>
<span class="k">continue</span>
<span class="n">group_name</span> <span class="o">=</span> <span class="n">dirpath</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">sep</span><span class="p">,</span><span class="s1">&#39;/&#39;</span><span class="p">)</span>
<span class="n">group_name</span> <span class="o">=</span> <span class="n">group_name</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">root_dir</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">sep</span><span class="p">,</span><span class="s1">&#39;/&#39;</span><span class="p">)</span> <span class="o">+</span> <span class="s1">&#39;/&#39;</span><span class="p">,</span> <span class="s1">&#39;/&#39;</span><span class="p">)</span>
<span class="c1"># Flatten group name to one level</span>
<span class="k">if</span> <span class="n">select_dir_keywords</span><span class="p">:</span>
<span class="n">offset</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">([</span><span class="nb">len</span><span class="p">(</span><span class="n">i</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">sep</span><span class="p">))</span> <span class="k">if</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">dirpath</span> <span class="k">else</span> <span class="mi">0</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">select_dir_keywords</span><span class="p">])</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">offset</span> <span class="o">=</span> <span class="mi">1</span>
<span class="n">tmp_list</span> <span class="o">=</span> <span class="n">group_name</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;/&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">tmp_list</span><span class="p">)</span> <span class="o">&gt;</span> <span class="n">offset</span><span class="o">+</span><span class="mi">1</span><span class="p">:</span>
<span class="n">group_name</span> <span class="o">=</span> <span class="s1">&#39;/&#39;</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="n">tmp_list</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">offset</span><span class="o">+</span><span class="mi">1</span><span class="p">)])</span>
<span class="c1"># Group hierarchy is implicitly defined by the forward slashes</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">group_name</span> <span class="ow">in</span> <span class="n">h5file</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
<span class="n">h5file</span><span class="o">.</span><span class="n">create_group</span><span class="p">(</span><span class="n">group_name</span><span class="p">)</span>
<span class="n">h5file</span><span class="p">[</span><span class="n">group_name</span><span class="p">]</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">&#39;creation_date&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">created_at</span><span class="p">()</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s1">&#39;utf-8&#39;</span><span class="p">)</span>
<span class="c1">#h5file[group_name].attrs.create(name=&#39;filtered_file_list&#39;,data=convert_string_to_bytes(filtered_filename_list))</span>
<span class="c1">#h5file[group_name].attrs.create(name=&#39;file_list&#39;,data=convert_string_to_bytes(filenames_list))</span>
<span class="k">else</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="n">group_name</span><span class="p">,</span><span class="s1">&#39; was already created.&#39;</span><span class="p">)</span>
<span class="k">for</span> <span class="n">filenumber</span><span class="p">,</span> <span class="n">filename</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">filtered_filenames_list</span><span class="p">):</span>
<span class="c1">#file_ext = os.path.splitext(filename)[1]</span>
<span class="c1">#try: </span>
<span class="c1"># hdf5 path to filename group </span>
<span class="n">dest_group_name</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">group_name</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">filename</span><span class="si">}</span><span class="s1">&#39;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="s1">&#39;h5&#39;</span> <span class="ow">in</span> <span class="n">filename</span><span class="p">:</span>
<span class="c1">#file_dict = config_file.select_file_readers(group_id)[file_ext](os.path.join(dirpath,filename))</span>
<span class="c1">#file_dict = ext_to_reader_dict[file_ext](os.path.join(dirpath,filename))</span>
<span class="n">file_dict</span> <span class="o">=</span> <span class="n">filereader_registry</span><span class="o">.</span><span class="n">select_file_reader</span><span class="p">(</span><span class="n">dest_group_name</span><span class="p">)(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">dirpath</span><span class="p">,</span><span class="n">filename</span><span class="p">))</span>
<span class="n">__transfer_file_dict_to_hdf5</span><span class="p">(</span><span class="n">h5file</span><span class="p">,</span> <span class="n">group_name</span><span class="p">,</span> <span class="n">file_dict</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">source_file_path</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">dirpath</span><span class="p">,</span><span class="n">filename</span><span class="p">)</span>
<span class="n">dest_file_obj</span> <span class="o">=</span> <span class="n">h5file</span>
<span class="c1">#group_name +&#39;/&#39;+filename</span>
<span class="c1">#ext_to_reader_dict[file_ext](source_file_path, dest_file_obj, dest_group_name)</span>
<span class="c1">#g5505f_reader.select_file_reader(dest_group_name)(source_file_path, dest_file_obj, dest_group_name)</span>
<span class="n">__copy_file_in_group</span><span class="p">(</span><span class="n">source_file_path</span><span class="p">,</span> <span class="n">dest_file_obj</span><span class="p">,</span> <span class="n">dest_group_name</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
<span class="c1"># Update the progress bar and log the end message</span>
<span class="n">utils</span><span class="o">.</span><span class="n">progressBar</span><span class="p">(</span><span class="n">dir_number</span><span class="p">,</span> <span class="n">number_of_dirs</span><span class="p">,</span> <span class="n">end_message</span><span class="p">)</span>
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="n">end_message</span><span class="p">)</span>
<span class="n">dir_number</span> <span class="o">=</span> <span class="n">dir_number</span> <span class="o">+</span> <span class="mi">1</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">root_metadata_dict</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span><span class="o">&gt;</span><span class="mi">0</span><span class="p">:</span>
<span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">root_metadata_dict</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="c1">#if key in h5file.attrs:</span>
<span class="c1"># del h5file.attrs[key]</span>
<span class="n">h5file</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span>
<span class="c1">#annotate_root_dir(output_filename,root_metadata_dict) </span>
<span class="c1">#output_yml_filename_path = hdf5_vis.take_yml_snapshot_of_hdf5_file(output_filename)</span>
<span class="k">return</span> <span class="n">path_to_output_file</span> <span class="c1">#, output_yml_filename_path</span></div>
<div class="viewcode-block" id="save_processed_dataframe_to_hdf5">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_lib.save_processed_dataframe_to_hdf5">[docs]</a>
<span class="k">def</span> <span class="nf">save_processed_dataframe_to_hdf5</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">annotator</span><span class="p">,</span> <span class="n">output_filename</span><span class="p">):</span> <span class="c1"># src_hdf5_path, script_date, script_name):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Save processed dataframe columns with annotations to an HDF5 file.</span>
<span class="sd"> Parameters:</span>
<span class="sd"> df (pd.DataFrame): DataFrame containing processed time series.</span>
<span class="sd"> annotator (): Annotator object with get_metadata method.</span>
<span class="sd"> output_filename (str): Path to the source HDF5 file.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># Convert datetime columns to string</span>
<span class="n">datetime_cols</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">select_dtypes</span><span class="p">(</span><span class="n">include</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;datetime64&#39;</span><span class="p">])</span><span class="o">.</span><span class="n">columns</span>
<span class="k">if</span> <span class="nb">list</span><span class="p">(</span><span class="n">datetime_cols</span><span class="p">):</span>
<span class="n">df</span><span class="p">[</span><span class="n">datetime_cols</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="n">datetime_cols</span><span class="p">]</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="nb">str</span><span class="p">)</span>
<span class="c1"># Convert dataframe to structured array</span>
<span class="n">icad_data_table</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">convert_dataframe_to_np_structured_array</span><span class="p">(</span><span class="n">df</span><span class="p">)</span>
<span class="c1"># Get metadata</span>
<span class="n">metadata_dict</span> <span class="o">=</span> <span class="n">annotator</span><span class="o">.</span><span class="n">get_metadata</span><span class="p">()</span>
<span class="c1"># Prepare project level attributes to be added at the root level</span>
<span class="n">project_level_attributes</span> <span class="o">=</span> <span class="n">metadata_dict</span><span class="p">[</span><span class="s1">&#39;metadata&#39;</span><span class="p">][</span><span class="s1">&#39;project&#39;</span><span class="p">]</span>
<span class="c1"># Prepare high-level attributes</span>
<span class="n">high_level_attributes</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;parent_files&#39;</span><span class="p">:</span> <span class="n">metadata_dict</span><span class="p">[</span><span class="s1">&#39;parent_files&#39;</span><span class="p">],</span>
<span class="o">**</span><span class="n">metadata_dict</span><span class="p">[</span><span class="s1">&#39;metadata&#39;</span><span class="p">][</span><span class="s1">&#39;sample&#39;</span><span class="p">],</span>
<span class="o">**</span><span class="n">metadata_dict</span><span class="p">[</span><span class="s1">&#39;metadata&#39;</span><span class="p">][</span><span class="s1">&#39;environment&#39;</span><span class="p">],</span>
<span class="o">**</span><span class="n">metadata_dict</span><span class="p">[</span><span class="s1">&#39;metadata&#39;</span><span class="p">][</span><span class="s1">&#39;instruments&#39;</span><span class="p">]</span>
<span class="p">}</span>
<span class="c1"># Prepare data level attributes</span>
<span class="n">data_level_attributes</span> <span class="o">=</span> <span class="n">metadata_dict</span><span class="p">[</span><span class="s1">&#39;metadata&#39;</span><span class="p">][</span><span class="s1">&#39;datasets&#39;</span><span class="p">]</span>
<span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">data_level_attributes</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span><span class="nb">dict</span><span class="p">):</span>
<span class="n">data_level_attributes</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">convert_attrdict_to_np_structured_array</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
<span class="c1"># Prepare file dictionary</span>
<span class="n">file_dict</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;name&#39;</span><span class="p">:</span> <span class="n">project_level_attributes</span><span class="p">[</span><span class="s1">&#39;processing_file&#39;</span><span class="p">],</span>
<span class="s1">&#39;attributes_dict&#39;</span><span class="p">:</span> <span class="n">high_level_attributes</span><span class="p">,</span>
<span class="s1">&#39;datasets&#39;</span><span class="p">:</span> <span class="p">[{</span>
<span class="s1">&#39;name&#39;</span><span class="p">:</span> <span class="s2">&quot;data_table&quot;</span><span class="p">,</span>
<span class="s1">&#39;data&#39;</span><span class="p">:</span> <span class="n">icad_data_table</span><span class="p">,</span>
<span class="s1">&#39;shape&#39;</span><span class="p">:</span> <span class="n">icad_data_table</span><span class="o">.</span><span class="n">shape</span><span class="p">,</span>
<span class="s1">&#39;attributes&#39;</span><span class="p">:</span> <span class="n">data_level_attributes</span>
<span class="p">}]</span>
<span class="p">}</span>
<span class="c1"># Check if the file exists</span>
<span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">output_filename</span><span class="p">):</span>
<span class="n">mode</span> <span class="o">=</span> <span class="s2">&quot;a&quot;</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;File </span><span class="si">{</span><span class="n">output_filename</span><span class="si">}</span><span class="s2"> exists. Opening in append mode.&quot;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">mode</span> <span class="o">=</span> <span class="s2">&quot;w&quot;</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;File </span><span class="si">{</span><span class="n">output_filename</span><span class="si">}</span><span class="s2"> does not exist. Creating a new file.&quot;</span><span class="p">)</span>
<span class="c1"># Write to HDF5</span>
<span class="k">with</span> <span class="n">h5py</span><span class="o">.</span><span class="n">File</span><span class="p">(</span><span class="n">output_filename</span><span class="p">,</span> <span class="n">mode</span><span class="p">)</span> <span class="k">as</span> <span class="n">h5file</span><span class="p">:</span>
<span class="c1"># Add project level attributes at the root/top level</span>
<span class="n">h5file</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">project_level_attributes</span><span class="p">)</span>
<span class="n">__transfer_file_dict_to_hdf5</span><span class="p">(</span><span class="n">h5file</span><span class="p">,</span> <span class="s1">&#39;/&#39;</span><span class="p">,</span> <span class="n">file_dict</span><span class="p">)</span></div>
<span class="c1">#if __name__ == &#39;__main__&#39;:</span>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, JFFO.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@@ -0,0 +1,832 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>src.hdf5_ops &mdash; DIMA 1.0.0 documentation</title>
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../_static/documentation_options.js?v=8d563738"></script>
<script src="../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script src="../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../index.html" class="icon icon-home">
DIMA
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html">HDF5 data operations</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.hdf5_writer">Data integration with HDF5</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.hdf5_vis">Data visualization</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/pipelines.html">Pipelines and workflows</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/utils.html">Utilities</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../index.html">DIMA</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
<li class="breadcrumb-item active">src.hdf5_ops</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for src.hdf5_ops</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">sys</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">thisFilePath</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="vm">__file__</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">NameError</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Error: __file__ is not available. Ensure the script is being run from a file.&quot;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;[Notice] Path to DIMA package may not be resolved properly.&quot;</span><span class="p">)</span>
<span class="n">thisFilePath</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">getcwd</span><span class="p">()</span> <span class="c1"># Use current directory or specify a default</span>
<span class="n">dimaPath</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">normpath</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">thisFilePath</span><span class="p">,</span> <span class="s2">&quot;..&quot;</span><span class="p">,</span><span class="s1">&#39;..&#39;</span><span class="p">))</span> <span class="c1"># Move up to project root</span>
<span class="k">if</span> <span class="n">dimaPath</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">sys</span><span class="o">.</span><span class="n">path</span><span class="p">:</span> <span class="c1"># Avoid duplicate entries</span>
<span class="n">sys</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">dimaPath</span><span class="p">)</span>
<span class="kn">import</span> <span class="nn">h5py</span>
<span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">import</span> <span class="nn">utils.g5505_utils</span> <span class="k">as</span> <span class="nn">utils</span>
<span class="kn">import</span> <span class="nn">src.hdf5_writer</span> <span class="k">as</span> <span class="nn">hdf5_lib</span>
<span class="kn">import</span> <span class="nn">logging</span>
<span class="kn">import</span> <span class="nn">datetime</span>
<span class="kn">import</span> <span class="nn">h5py</span>
<span class="kn">import</span> <span class="nn">yaml</span>
<span class="kn">import</span> <span class="nn">json</span>
<span class="kn">import</span> <span class="nn">copy</span>
<div class="viewcode-block" id="HDF5DataOpsManager">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.HDF5DataOpsManager">[docs]</a>
<span class="k">class</span> <span class="nc">HDF5DataOpsManager</span><span class="p">():</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> A class to handle HDF5 fundamental middle level file operations to power data updates, metadata revision, and data analysis</span>
<span class="sd"> with hdf5 files encoding multi-instrument experimental campaign data.</span>
<span class="sd"> Parameters:</span>
<span class="sd"> -----------</span>
<span class="sd"> path_to_file : str</span>
<span class="sd"> path/to/hdf5file.</span>
<span class="sd"> mode : str</span>
<span class="sd"> &#39;r&#39; or &#39;r+&#39; read or read/write mode only when file exists</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">file_path</span><span class="p">,</span> <span class="n">mode</span> <span class="o">=</span> <span class="s1">&#39;r+&#39;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="c1"># Class attributes</span>
<span class="k">if</span> <span class="n">mode</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">&#39;r&#39;</span><span class="p">,</span><span class="s1">&#39;r+&#39;</span><span class="p">]:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">mode</span> <span class="o">=</span> <span class="n">mode</span>
<span class="bp">self</span><span class="o">.</span><span class="n">file_path</span> <span class="o">=</span> <span class="n">file_path</span>
<span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span> <span class="o">=</span> <span class="kc">None</span>
<span class="c1">#self._open_file()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">dataset_metadata_df</span> <span class="o">=</span> <span class="kc">None</span>
<span class="c1"># Define private methods </span>
<span class="c1"># Define public methods</span>
<div class="viewcode-block" id="HDF5DataOpsManager.load_file_obj">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.HDF5DataOpsManager.load_file_obj">[docs]</a>
<span class="k">def</span> <span class="nf">load_file_obj</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span> <span class="o">=</span> <span class="n">h5py</span><span class="o">.</span><span class="n">File</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">file_path</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">mode</span><span class="p">)</span></div>
<div class="viewcode-block" id="HDF5DataOpsManager.unload_file_obj">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.HDF5DataOpsManager.unload_file_obj">[docs]</a>
<span class="k">def</span> <span class="nf">unload_file_obj</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="o">.</span><span class="n">flush</span><span class="p">()</span> <span class="c1"># Ensure all data is written to disk</span>
<span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span> <span class="o">=</span> <span class="kc">None</span></div>
<div class="viewcode-block" id="HDF5DataOpsManager.extract_and_load_dataset_metadata">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.HDF5DataOpsManager.extract_and_load_dataset_metadata">[docs]</a>
<span class="k">def</span> <span class="nf">extract_and_load_dataset_metadata</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">def</span> <span class="nf">__get_datasets</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">obj</span><span class="p">,</span> <span class="n">list_of_datasets</span><span class="p">):</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span><span class="n">h5py</span><span class="o">.</span><span class="n">Dataset</span><span class="p">):</span>
<span class="n">list_of_datasets</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">name</span><span class="p">)</span>
<span class="c1">#print(f&#39;Adding dataset: {name}&#39;) #tail: {head} head: {tail}&#39;)</span>
<span class="n">list_of_datasets</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">&quot;File object is not loaded. Please load the HDF5 file using the &#39;load_file_obj&#39; method before attempting to extract datasets.&quot;</span><span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">list_of_datasets</span> <span class="o">=</span> <span class="p">[]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="o">.</span><span class="n">visititems</span><span class="p">(</span><span class="k">lambda</span> <span class="n">name</span><span class="p">,</span> <span class="n">obj</span><span class="p">:</span> <span class="n">__get_datasets</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">obj</span><span class="p">,</span> <span class="n">list_of_datasets</span><span class="p">))</span>
<span class="n">dataset_metadata_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;dataset_name&#39;</span><span class="p">:</span> <span class="n">list_of_datasets</span><span class="p">})</span>
<span class="n">dataset_metadata_df</span><span class="p">[</span><span class="s1">&#39;parent_instrument&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">dataset_metadata_df</span><span class="p">[</span><span class="s1">&#39;dataset_name&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;/&#39;</span><span class="p">)[</span><span class="o">-</span><span class="mi">3</span><span class="p">])</span>
<span class="n">dataset_metadata_df</span><span class="p">[</span><span class="s1">&#39;parent_file&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">dataset_metadata_df</span><span class="p">[</span><span class="s1">&#39;dataset_name&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;/&#39;</span><span class="p">)[</span><span class="o">-</span><span class="mi">2</span><span class="p">])</span>
<span class="bp">self</span><span class="o">.</span><span class="n">dataset_metadata_df</span> <span class="o">=</span> <span class="n">dataset_metadata_df</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">unload_file_obj</span><span class="p">()</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;An unexpected error occurred: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">. File object will be unloaded.&quot;</span><span class="p">)</span> </div>
<div class="viewcode-block" id="HDF5DataOpsManager.extract_dataset_as_dataframe">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.HDF5DataOpsManager.extract_dataset_as_dataframe">[docs]</a>
<span class="k">def</span> <span class="nf">extract_dataset_as_dataframe</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span><span class="n">dataset_name</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot; </span>
<span class="sd"> returns a copy of the dataset content in the form of dataframe when possible or numpy array </span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">&quot;File object is not loaded. Please load the HDF5 file using the &#39;load_file_obj&#39; method before attempting to extract datasets.&quot;</span><span class="p">)</span>
<span class="n">dataset_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="p">[</span><span class="n">dataset_name</span><span class="p">]</span>
<span class="c1"># Read dataset content from dataset obj</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">dataset_obj</span><span class="p">[</span><span class="o">...</span><span class="p">]</span>
<span class="c1"># The above statement can be understood as follows: </span>
<span class="c1"># data = np.empty(shape=dataset_obj.shape, </span>
<span class="c1"># dtype=dataset_obj.dtype)</span>
<span class="c1"># dataset_obj.read_direct(data)</span>
<span class="k">try</span><span class="p">:</span>
<span class="k">return</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">ValueError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="n">logging</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Failed to convert dataset &#39;</span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s2">&#39; to DataFrame: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">. Instead, dataset will be returned as Numpy array.&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">data</span> <span class="c1"># &#39;data&#39; is a NumPy array here</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">unload_file_obj</span><span class="p">()</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;An unexpected error occurred: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">. Returning None and unloading file object&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">None</span></div>
<span class="c1"># Define metadata revision methods: append(), update(), delete(), and rename().</span>
<div class="viewcode-block" id="HDF5DataOpsManager.append_metadata">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.HDF5DataOpsManager.append_metadata">[docs]</a>
<span class="k">def</span> <span class="nf">append_metadata</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">obj_name</span><span class="p">,</span> <span class="n">annotation_dict</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot; </span>
<span class="sd"> Appends metadata attributes to the specified object (obj_name) based on the provided annotation_dict.</span>
<span class="sd"> This method ensures that the provided metadata attributes do not overwrite any existing ones. If an attribute already exists, </span>
<span class="sd"> a ValueError is raised. The function supports storing scalar values (int, float, str) and compound values such as dictionaries </span>
<span class="sd"> that are converted into NumPy structured arrays before being added to the metadata.</span>
<span class="sd"> Parameters:</span>
<span class="sd"> -----------</span>
<span class="sd"> obj_name: str</span>
<span class="sd"> Path to the target object (dataset or group) within the HDF5 file.</span>
<span class="sd"> annotation_dict: dict</span>
<span class="sd"> A dictionary where the keys represent new attribute names (strings), and the values can be:</span>
<span class="sd"> - Scalars: int, float, or str.</span>
<span class="sd"> - Compound values (dictionaries) for more complex metadata, which are converted to NumPy structured arrays. </span>
<span class="sd"> Example of a compound value:</span>
<span class="sd"> </span>
<span class="sd"> Example:</span>
<span class="sd"> ----------</span>
<span class="sd"> annotation_dict = {</span>
<span class="sd"> &quot;relative_humidity&quot;: {</span>
<span class="sd"> &quot;value&quot;: 65,</span>
<span class="sd"> &quot;units&quot;: &quot;percentage&quot;,</span>
<span class="sd"> &quot;range&quot;: &quot;[0,100]&quot;,</span>
<span class="sd"> &quot;definition&quot;: &quot;amount of water vapor present ...&quot;</span>
<span class="sd"> }</span>
<span class="sd"> }</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">&quot;File object is not loaded. Please load the HDF5 file using the &#39;load_file_obj&#39; method before attempting to modify it.&quot;</span><span class="p">)</span>
<span class="c1"># Create a copy of annotation_dict to avoid modifying the original</span>
<span class="n">annotation_dict_copy</span> <span class="o">=</span> <span class="n">copy</span><span class="o">.</span><span class="n">deepcopy</span><span class="p">(</span><span class="n">annotation_dict</span><span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="p">[</span><span class="n">obj_name</span><span class="p">]</span>
<span class="c1"># Check if any attribute already exists</span>
<span class="k">if</span> <span class="nb">any</span><span class="p">(</span><span class="n">key</span> <span class="ow">in</span> <span class="n">obj</span><span class="o">.</span><span class="n">attrs</span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">annotation_dict_copy</span><span class="o">.</span><span class="n">keys</span><span class="p">()):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Make sure the provided (key, value) pairs are not existing metadata elements or attributes. To modify or delete existing attributes use .modify_annotation() or .delete_annotation()&quot;</span><span class="p">)</span>
<span class="c1"># Process the dictionary values and convert them to structured arrays if needed</span>
<span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">annotation_dict_copy</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
<span class="c1"># Convert dictionaries to NumPy structured arrays for complex attributes</span>
<span class="n">annotation_dict_copy</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">convert_attrdict_to_np_structured_array</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
<span class="c1"># Update the object&#39;s attributes with the new metadata</span>
<span class="n">obj</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">annotation_dict_copy</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">unload_file_obj</span><span class="p">()</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;An unexpected error occurred: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">. The file object has been properly closed.&quot;</span><span class="p">)</span></div>
<div class="viewcode-block" id="HDF5DataOpsManager.update_metadata">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.HDF5DataOpsManager.update_metadata">[docs]</a>
<span class="k">def</span> <span class="nf">update_metadata</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">obj_name</span><span class="p">,</span> <span class="n">annotation_dict</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot; </span>
<span class="sd"> Updates the value of existing metadata attributes of the specified object (obj_name) based on the provided annotation_dict.</span>
<span class="sd"> The function disregards non-existing attributes and suggests to use the append_metadata() method to include those in the metadata.</span>
<span class="sd"> Parameters:</span>
<span class="sd"> -----------</span>
<span class="sd"> obj_name : str</span>
<span class="sd"> Path to the target object (dataset or group) within the HDF5 file.</span>
<span class="sd"> annotation_dict: dict</span>
<span class="sd"> A dictionary where the keys represent existing attribute names (strings), and the values can be:</span>
<span class="sd"> - Scalars: int, float, or str.</span>
<span class="sd"> - Compound values (dictionaries) for more complex metadata, which are converted to NumPy structured arrays. </span>
<span class="sd"> Example of a compound value:</span>
<span class="sd"> </span>
<span class="sd"> Example:</span>
<span class="sd"> ----------</span>
<span class="sd"> annotation_dict = {</span>
<span class="sd"> &quot;relative_humidity&quot;: {</span>
<span class="sd"> &quot;value&quot;: 65,</span>
<span class="sd"> &quot;units&quot;: &quot;percentage&quot;,</span>
<span class="sd"> &quot;range&quot;: &quot;[0,100]&quot;,</span>
<span class="sd"> &quot;definition&quot;: &quot;amount of water vapor present ...&quot;</span>
<span class="sd"> }</span>
<span class="sd"> }</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">&quot;File object is not loaded. Please load the HDF5 file using the &#39;load_file_obj&#39; method before attempting to modify it.&quot;</span><span class="p">)</span>
<span class="n">update_dict</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="p">[</span><span class="n">obj_name</span><span class="p">]</span>
<span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">annotation_dict</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="k">if</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">obj</span><span class="o">.</span><span class="n">attrs</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
<span class="n">update_dict</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">convert_attrdict_to_np_structured_array</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">update_dict</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">value</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># Optionally, log or warn about non-existing keys being ignored.</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Warning: Key &#39;</span><span class="si">{</span><span class="n">key</span><span class="si">}</span><span class="s2">&#39; does not exist and will be ignored.&quot;</span><span class="p">)</span>
<span class="n">obj</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">update_dict</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">unload_file_obj</span><span class="p">()</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;An unexpected error occurred: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">. The file object has been properly closed.&quot;</span><span class="p">)</span></div>
<div class="viewcode-block" id="HDF5DataOpsManager.delete_metadata">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.HDF5DataOpsManager.delete_metadata">[docs]</a>
<span class="k">def</span> <span class="nf">delete_metadata</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">obj_name</span><span class="p">,</span> <span class="n">annotation_dict</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Deletes metadata attributes of the specified object (obj_name) based on the provided annotation_dict.</span>
<span class="sd"> Parameters:</span>
<span class="sd"> -----------</span>
<span class="sd"> obj_name: str</span>
<span class="sd"> Path to the target object (dataset or group) within the HDF5 file.</span>
<span class="sd"> </span>
<span class="sd"> annotation_dict: dict</span>
<span class="sd"> Dictionary where keys represent attribute names, and values should be dictionaries containing </span>
<span class="sd"> {&quot;delete&quot;: True} to mark them for deletion.</span>
<span class="sd"> Example:</span>
<span class="sd"> --------</span>
<span class="sd"> annotation_dict = {&quot;attr_to_be_deleted&quot;: {&quot;delete&quot;: True}}</span>
<span class="sd"> Behavior:</span>
<span class="sd"> ---------</span>
<span class="sd"> - Deletes the specified attributes from the object&#39;s metadata if marked for deletion.</span>
<span class="sd"> - Issues a warning if the attribute is not found or not marked for deletion.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">&quot;File object is not loaded. Please load the HDF5 file using the &#39;load_file_obj&#39; method before attempting to modify it.&quot;</span><span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="p">[</span><span class="n">obj_name</span><span class="p">]</span>
<span class="k">for</span> <span class="n">attr_key</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">annotation_dict</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="k">if</span> <span class="n">attr_key</span> <span class="ow">in</span> <span class="n">obj</span><span class="o">.</span><span class="n">attrs</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">dict</span><span class="p">)</span> <span class="ow">and</span> <span class="n">value</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;delete&#39;</span><span class="p">,</span> <span class="kc">False</span><span class="p">):</span>
<span class="n">obj</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="fm">__delitem__</span><span class="p">(</span><span class="n">attr_key</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">msg</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;Warning: Value for key &#39;</span><span class="si">{</span><span class="n">attr_key</span><span class="si">}</span><span class="s2">&#39; is not marked for deletion or is invalid.&quot;</span>
<span class="nb">print</span><span class="p">(</span><span class="n">msg</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">msg</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;Warning: Key &#39;</span><span class="si">{</span><span class="n">attr_key</span><span class="si">}</span><span class="s2">&#39; does not exist in metadata.&quot;</span>
<span class="nb">print</span><span class="p">(</span><span class="n">msg</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">unload_file_obj</span><span class="p">()</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;An unexpected error occurred: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">. The file object has been properly closed.&quot;</span><span class="p">)</span></div>
<div class="viewcode-block" id="HDF5DataOpsManager.rename_metadata">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.HDF5DataOpsManager.rename_metadata">[docs]</a>
<span class="k">def</span> <span class="nf">rename_metadata</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">obj_name</span><span class="p">,</span> <span class="n">renaming_map</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot; </span>
<span class="sd"> Renames metadata attributes of the specified object (obj_name) based on the provided renaming_map.</span>
<span class="sd"> Parameters:</span>
<span class="sd"> -----------</span>
<span class="sd"> obj_name: str</span>
<span class="sd"> Path to the target object (dataset or group) within the HDF5 file.</span>
<span class="sd"> renaming_map: dict</span>
<span class="sd"> A dictionary where keys are current attribute names (strings), and values are the new attribute names (strings or byte strings) to rename to.</span>
<span class="sd"> </span>
<span class="sd"> Example:</span>
<span class="sd"> --------</span>
<span class="sd"> renaming_map = {</span>
<span class="sd"> &quot;old_attr_name&quot;: &quot;new_attr_name&quot;,</span>
<span class="sd"> &quot;old_attr_2&quot;: &quot;new_attr_2&quot;</span>
<span class="sd"> }</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">&quot;File object is not loaded. Please load the HDF5 file using the &#39;load_file_obj&#39; method before attempting to modify it.&quot;</span><span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="p">[</span><span class="n">obj_name</span><span class="p">]</span>
<span class="c1"># Iterate over the renaming_map to process renaming</span>
<span class="k">for</span> <span class="n">old_attr</span><span class="p">,</span> <span class="n">new_attr</span> <span class="ow">in</span> <span class="n">renaming_map</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="k">if</span> <span class="n">old_attr</span> <span class="ow">in</span> <span class="n">obj</span><span class="o">.</span><span class="n">attrs</span><span class="p">:</span>
<span class="c1"># Get the old attribute&#39;s value</span>
<span class="n">attr_value</span> <span class="o">=</span> <span class="n">obj</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="n">old_attr</span><span class="p">]</span>
<span class="c1"># Create a new attribute with the new name</span>
<span class="n">obj</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="n">new_attr</span><span class="p">,</span> <span class="n">data</span><span class="o">=</span><span class="n">attr_value</span><span class="p">)</span>
<span class="c1"># Delete the old attribute</span>
<span class="n">obj</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="fm">__delitem__</span><span class="p">(</span><span class="n">old_attr</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># Skip if the old attribute doesn&#39;t exist</span>
<span class="n">msg</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;Skipping: Attribute &#39;</span><span class="si">{</span><span class="n">old_attr</span><span class="si">}</span><span class="s2">&#39; does not exist.&quot;</span>
<span class="nb">print</span><span class="p">(</span><span class="n">msg</span><span class="p">)</span> <span class="c1"># Optionally, replace with warnings.warn(msg)</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">unload_file_obj</span><span class="p">()</span>
<span class="nb">print</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;An unexpected error occurred: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">. The file object has been properly closed. &quot;</span>
<span class="s2">&quot;Please ensure that &#39;obj_name&#39; exists in the file, and that the keys in &#39;renaming_map&#39; are valid attributes of the object.&quot;</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">unload_file_obj</span><span class="p">()</span></div>
<div class="viewcode-block" id="HDF5DataOpsManager.get_metadata">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.HDF5DataOpsManager.get_metadata">[docs]</a>
<span class="k">def</span> <span class="nf">get_metadata</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">obj_path</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot; Get file attributes from object at path = obj_path. For example,</span>
<span class="sd"> obj_path = &#39;/&#39; will get root level attributes or metadata.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">try</span><span class="p">:</span>
<span class="c1"># Access the attributes for the object at the given path</span>
<span class="n">metadata_dict</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="p">[</span><span class="n">obj_path</span><span class="p">]</span><span class="o">.</span><span class="n">attrs</span>
<span class="k">except</span> <span class="ne">KeyError</span><span class="p">:</span>
<span class="c1"># Handle the case where the path doesn&#39;t exist</span>
<span class="n">logging</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;Invalid object path: </span><span class="si">{</span><span class="n">obj_path</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="n">metadata_dict</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">return</span> <span class="n">metadata_dict</span></div>
<div class="viewcode-block" id="HDF5DataOpsManager.reformat_datetime_column">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.HDF5DataOpsManager.reformat_datetime_column">[docs]</a>
<span class="k">def</span> <span class="nf">reformat_datetime_column</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataset_name</span><span class="p">,</span> <span class="n">column_name</span><span class="p">,</span> <span class="n">src_format</span><span class="p">,</span> <span class="n">desired_format</span><span class="o">=</span><span class="s1">&#39;%Y-%m-</span><span class="si">%d</span><span class="s1"> %H:%M:%S.</span><span class="si">%f</span><span class="s1">&#39;</span><span class="p">):</span>
<span class="c1"># Access the dataset</span>
<span class="n">dataset</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="p">[</span><span class="n">dataset_name</span><span class="p">]</span>
<span class="c1"># Read the column data into a pandas Series and decode bytes to strings</span>
<span class="n">dt_column_data</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">(</span><span class="n">dataset</span><span class="p">[</span><span class="n">column_name</span><span class="p">][:])</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="o">.</span><span class="n">decode</span><span class="p">()</span> <span class="p">)</span>
<span class="c1"># Convert to datetime using the source format</span>
<span class="n">dt_column_data</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">to_datetime</span><span class="p">(</span><span class="n">dt_column_data</span><span class="p">,</span> <span class="nb">format</span><span class="o">=</span><span class="n">src_format</span><span class="p">,</span> <span class="n">errors</span> <span class="o">=</span> <span class="s1">&#39;coerce&#39;</span><span class="p">)</span>
<span class="c1"># Reformat datetime objects to the desired format as strings</span>
<span class="n">dt_column_data</span> <span class="o">=</span> <span class="n">dt_column_data</span><span class="o">.</span><span class="n">dt</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="n">desired_format</span><span class="p">)</span>
<span class="c1"># Encode the strings back to bytes</span>
<span class="c1">#encoded_data = dt_column_data.apply(lambda x: x.encode() if not pd.isnull(x) else &#39;N/A&#39;).to_numpy()</span>
<span class="c1"># Update the dataset in place</span>
<span class="c1">#dataset[column_name][:] = encoded_data</span>
<span class="c1"># Convert byte strings to datetime objects</span>
<span class="c1">#timestamps = [datetime.datetime.strptime(a.decode(), src_format).strftime(desired_format) for a in dt_column_data] </span>
<span class="c1">#datetime.strptime(&#39;31/01/22 23:59:59.999999&#39;,</span>
<span class="c1"># &#39;%d/%m/%y %H:%M:%S.%f&#39;)</span>
<span class="c1">#pd.to_datetime(</span>
<span class="c1"># np.array([a.decode() for a in dt_column_data]),</span>
<span class="c1"># format=src_format,</span>
<span class="c1"># errors=&#39;coerce&#39;</span>
<span class="c1">#)</span>
<span class="c1"># Standardize the datetime format</span>
<span class="c1">#standardized_time = datetime.strftime(desired_format)</span>
<span class="c1"># Convert to byte strings to store back in the HDF5 dataset</span>
<span class="c1">#standardized_time_bytes = np.array([s.encode() for s in timestamps])</span>
<span class="c1"># Update the column in the dataset (in-place update)</span>
<span class="c1"># TODO: make this a more secure operation</span>
<span class="c1">#dataset[column_name][:] = standardized_time_bytes</span>
<span class="c1">#return np.array(timestamps)</span>
<span class="k">return</span> <span class="n">dt_column_data</span><span class="o">.</span><span class="n">to_numpy</span><span class="p">()</span></div>
<span class="c1"># Define data append operations: append_dataset(), and update_file()</span>
<div class="viewcode-block" id="HDF5DataOpsManager.append_dataset">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.HDF5DataOpsManager.append_dataset">[docs]</a>
<span class="k">def</span> <span class="nf">append_dataset</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span><span class="n">dataset_dict</span><span class="p">,</span> <span class="n">group_name</span><span class="p">):</span>
<span class="c1"># Parse value into HDF5 admissible type</span>
<span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">dataset_dict</span><span class="p">[</span><span class="s1">&#39;attributes&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
<span class="n">value</span> <span class="o">=</span> <span class="n">dataset_dict</span><span class="p">[</span><span class="s1">&#39;attributes&#39;</span><span class="p">][</span><span class="n">key</span><span class="p">]</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
<span class="n">dataset_dict</span><span class="p">[</span><span class="s1">&#39;attributes&#39;</span><span class="p">][</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">convert_attrdict_to_np_structured_array</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">group_name</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="o">.</span><span class="n">create_group</span><span class="p">(</span><span class="n">group_name</span><span class="p">,</span> <span class="n">track_order</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="p">[</span><span class="n">group_name</span><span class="p">]</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">&#39;creation_date&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">created_at</span><span class="p">()</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s2">&quot;utf-8&quot;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="p">[</span><span class="n">group_name</span><span class="p">]</span><span class="o">.</span><span class="n">create_dataset</span><span class="p">(</span><span class="n">dataset_dict</span><span class="p">[</span><span class="s1">&#39;name&#39;</span><span class="p">],</span> <span class="n">data</span><span class="o">=</span><span class="n">dataset_dict</span><span class="p">[</span><span class="s1">&#39;data&#39;</span><span class="p">])</span>
<span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="p">[</span><span class="n">group_name</span><span class="p">][</span><span class="n">dataset_dict</span><span class="p">[</span><span class="s1">&#39;name&#39;</span><span class="p">]]</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">dataset_dict</span><span class="p">[</span><span class="s1">&#39;attributes&#39;</span><span class="p">])</span>
<span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span><span class="p">[</span><span class="n">group_name</span><span class="p">]</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">&#39;last_update_date&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">created_at</span><span class="p">()</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s2">&quot;utf-8&quot;</span><span class="p">)</span></div>
<div class="viewcode-block" id="HDF5DataOpsManager.update_file">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.HDF5DataOpsManager.update_file">[docs]</a>
<span class="k">def</span> <span class="nf">update_file</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path_to_append_dir</span><span class="p">):</span>
<span class="c1"># Split the reference file path and the append directory path into directories and filenames</span>
<span class="n">ref_tail</span><span class="p">,</span> <span class="n">ref_head</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">file_path</span><span class="p">)</span>
<span class="n">ref_head_filename</span><span class="p">,</span> <span class="n">head_ext</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">splitext</span><span class="p">(</span><span class="n">ref_head</span><span class="p">)</span>
<span class="n">tail</span><span class="p">,</span> <span class="n">head</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">path_to_append_dir</span><span class="p">)</span>
<span class="c1"># Ensure the append directory is in the same directory as the reference file and has the same name (without extension)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="n">ref_tail</span> <span class="o">==</span> <span class="n">tail</span> <span class="ow">and</span> <span class="n">ref_head_filename</span> <span class="o">==</span> <span class="n">head</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;The append directory must be in the same directory as the reference HDF5 file and have the same name without the extension.&quot;</span><span class="p">)</span>
<span class="c1"># Close the file if it&#39;s already open</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">file_obj</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">unload_file_obj</span><span class="p">()</span>
<span class="c1"># Attempt to open the file in &#39;r+&#39; mode for appending</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">hdf5_lib</span><span class="o">.</span><span class="n">create_hdf5_file_from_filesystem_path</span><span class="p">(</span><span class="n">path_to_append_dir</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s1">&#39;r+&#39;</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">FileNotFoundError</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Reference HDF5 file &#39;</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">file_path</span><span class="si">}</span><span class="s2">&#39; not found.&quot;</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">OSError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Error opening HDF5 file: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span></div>
</div>
<div class="viewcode-block" id="get_parent_child_relationships">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.get_parent_child_relationships">[docs]</a>
<span class="k">def</span> <span class="nf">get_parent_child_relationships</span><span class="p">(</span><span class="n">file</span><span class="p">:</span> <span class="n">h5py</span><span class="o">.</span><span class="n">File</span><span class="p">):</span>
<span class="n">nodes</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;/&#39;</span><span class="p">]</span>
<span class="n">parent</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;&#39;</span><span class="p">]</span>
<span class="c1">#values = [file.attrs[&#39;count&#39;]]</span>
<span class="c1"># TODO: maybe we should make this more general and not dependent on file_list attribute? </span>
<span class="c1">#if &#39;file_list&#39; in file.attrs.keys():</span>
<span class="c1"># values = [len(file.attrs[&#39;file_list&#39;])]</span>
<span class="c1">#else:</span>
<span class="c1"># values = [1]</span>
<span class="n">values</span> <span class="o">=</span> <span class="p">[</span><span class="nb">len</span><span class="p">(</span><span class="n">file</span><span class="o">.</span><span class="n">keys</span><span class="p">())]</span>
<span class="k">def</span> <span class="nf">node_visitor</span><span class="p">(</span><span class="n">name</span><span class="p">,</span><span class="n">obj</span><span class="p">):</span>
<span class="k">if</span> <span class="n">name</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="s1">&#39;/&#39;</span><span class="p">)</span> <span class="o">&lt;=</span><span class="mi">2</span><span class="p">:</span>
<span class="n">nodes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">obj</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="n">parent</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">obj</span><span class="o">.</span><span class="n">parent</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="c1">#nodes.append(os.path.split(obj.name)[1])</span>
<span class="c1">#parent.append(os.path.split(obj.parent.name)[1])</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span><span class="n">h5py</span><span class="o">.</span><span class="n">Dataset</span><span class="p">):</span><span class="c1"># or not &#39;file_list&#39; in obj.attrs.keys():</span>
<span class="n">values</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="n">obj</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">values</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">obj</span><span class="o">.</span><span class="n">keys</span><span class="p">()))</span>
<span class="k">except</span><span class="p">:</span>
<span class="n">values</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
<span class="n">file</span><span class="o">.</span><span class="n">visititems</span><span class="p">(</span><span class="n">node_visitor</span><span class="p">)</span>
<span class="k">return</span> <span class="n">nodes</span><span class="p">,</span> <span class="n">parent</span><span class="p">,</span> <span class="n">values</span> </div>
<span class="k">def</span> <span class="nf">__print_metadata__</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">obj</span><span class="p">,</span> <span class="n">folder_depth</span><span class="p">,</span> <span class="n">yaml_dict</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Extracts metadata from HDF5 groups and datasets and organizes them into a dictionary with compact representation.</span>
<span class="sd"> </span>
<span class="sd"> Parameters:</span>
<span class="sd"> -----------</span>
<span class="sd"> name (str): Name of the HDF5 object being inspected.</span>
<span class="sd"> obj (h5py.Group or h5py.Dataset): The HDF5 object (Group or Dataset).</span>
<span class="sd"> folder_depth (int): Maximum depth of folders to explore.</span>
<span class="sd"> yaml_dict (dict): Dictionary to populate with metadata.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># Process only objects within the specified folder depth</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">obj</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;/&#39;</span><span class="p">))</span> <span class="o">&lt;=</span> <span class="n">folder_depth</span><span class="p">:</span> <span class="c1"># and &quot;.h5&quot; not in obj.name:</span>
<span class="n">name_to_list</span> <span class="o">=</span> <span class="n">obj</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;/&#39;</span><span class="p">)</span>
<span class="n">name_head</span> <span class="o">=</span> <span class="n">name_to_list</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">name_to_list</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">==</span><span class="s1">&#39;&#39;</span> <span class="k">else</span> <span class="n">obj</span><span class="o">.</span><span class="n">name</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="n">h5py</span><span class="o">.</span><span class="n">Group</span><span class="p">):</span> <span class="c1"># Handle groups</span>
<span class="c1"># Convert attributes to a YAML/JSON serializable format</span>
<span class="n">attr_dict</span> <span class="o">=</span> <span class="p">{</span><span class="n">key</span><span class="p">:</span> <span class="n">utils</span><span class="o">.</span><span class="n">to_serializable_dtype</span><span class="p">(</span><span class="n">val</span><span class="p">)</span> <span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">val</span> <span class="ow">in</span> <span class="n">obj</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span>
<span class="c1"># Initialize the group dictionary</span>
<span class="n">group_dict</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="n">name_head</span><span class="p">,</span> <span class="s2">&quot;attributes&quot;</span><span class="p">:</span> <span class="n">attr_dict</span><span class="p">}</span>
<span class="c1"># Handle group members compactly</span>
<span class="c1">#subgroups = [member_name for member_name in obj if isinstance(obj[member_name], h5py.Group)]</span>
<span class="c1">#datasets = [member_name for member_name in obj if isinstance(obj[member_name], h5py.Dataset)]</span>
<span class="c1"># Summarize groups and datasets</span>
<span class="c1">#group_dict[&quot;content_summary&quot;] = {</span>
<span class="c1"># &quot;group_count&quot;: len(subgroups),</span>
<span class="c1"># &quot;group_preview&quot;: subgroups[:3] + ([&quot;...&quot;] if len(subgroups) &gt; 3 else []),</span>
<span class="c1"># &quot;dataset_count&quot;: len(datasets),</span>
<span class="c1"># &quot;dataset_preview&quot;: datasets[:3] + ([&quot;...&quot;] if len(datasets) &gt; 3 else [])</span>
<span class="c1">#}</span>
<span class="n">yaml_dict</span><span class="p">[</span><span class="n">obj</span><span class="o">.</span><span class="n">name</span><span class="p">]</span> <span class="o">=</span> <span class="n">group_dict</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="n">h5py</span><span class="o">.</span><span class="n">Dataset</span><span class="p">):</span> <span class="c1"># Handle datasets</span>
<span class="c1"># Convert attributes to a YAML/JSON serializable format</span>
<span class="n">attr_dict</span> <span class="o">=</span> <span class="p">{</span><span class="n">key</span><span class="p">:</span> <span class="n">utils</span><span class="o">.</span><span class="n">to_serializable_dtype</span><span class="p">(</span><span class="n">val</span><span class="p">)</span> <span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">val</span> <span class="ow">in</span> <span class="n">obj</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span>
<span class="n">dataset_dict</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="n">name_head</span><span class="p">,</span> <span class="s2">&quot;attributes&quot;</span><span class="p">:</span> <span class="n">attr_dict</span><span class="p">}</span>
<span class="n">yaml_dict</span><span class="p">[</span><span class="n">obj</span><span class="o">.</span><span class="n">name</span><span class="p">]</span> <span class="o">=</span> <span class="n">dataset_dict</span>
<div class="viewcode-block" id="serialize_metadata">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.serialize_metadata">[docs]</a>
<span class="k">def</span> <span class="nf">serialize_metadata</span><span class="p">(</span><span class="n">input_filename_path</span><span class="p">,</span> <span class="n">folder_depth</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">4</span><span class="p">,</span> <span class="n">output_format</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">&#39;yaml&#39;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Serialize metadata from an HDF5 file into YAML or JSON format.</span>
<span class="sd"> </span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> input_filename_path : str</span>
<span class="sd"> The path to the input HDF5 file.</span>
<span class="sd"> folder_depth : int, optional</span>
<span class="sd"> The folder depth to control how much of the HDF5 file hierarchy is traversed (default is 4).</span>
<span class="sd"> output_format : str, optional</span>
<span class="sd"> The format to serialize the output, either &#39;yaml&#39; or &#39;json&#39; (default is &#39;yaml&#39;).</span>
<span class="sd"> </span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> str</span>
<span class="sd"> The output file path where the serialized metadata is stored (either .yaml or .json).</span>
<span class="sd"> </span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># Choose the appropriate output format (YAML or JSON)</span>
<span class="k">if</span> <span class="n">output_format</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">&#39;yaml&#39;</span><span class="p">,</span> <span class="s1">&#39;json&#39;</span><span class="p">]:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Unsupported format. Please choose either &#39;yaml&#39; or &#39;json&#39;.&quot;</span><span class="p">)</span>
<span class="c1"># Initialize dictionary to store YAML/JSON data</span>
<span class="n">yaml_dict</span> <span class="o">=</span> <span class="p">{}</span>
<span class="c1"># Split input file path to get the output file&#39;s base name</span>
<span class="n">output_filename_tail</span><span class="p">,</span> <span class="n">ext</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">splitext</span><span class="p">(</span><span class="n">input_filename_path</span><span class="p">)</span>
<span class="c1"># Open the HDF5 file and extract metadata</span>
<span class="k">with</span> <span class="n">h5py</span><span class="o">.</span><span class="n">File</span><span class="p">(</span><span class="n">input_filename_path</span><span class="p">,</span> <span class="s1">&#39;r&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
<span class="c1"># Convert attribute dict to a YAML/JSON serializable dict</span>
<span class="c1">#attrs_dict = {key: utils.to_serializable_dtype(val) for key, val in f.attrs.items()} </span>
<span class="c1">#yaml_dict[f.name] = {</span>
<span class="c1"># &quot;name&quot;: f.name,</span>
<span class="c1"># &quot;attributes&quot;: attrs_dict,</span>
<span class="c1"># &quot;datasets&quot;: {}</span>
<span class="c1">#}</span>
<span class="n">__print_metadata__</span><span class="p">(</span><span class="n">f</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">f</span><span class="p">,</span> <span class="n">folder_depth</span><span class="p">,</span> <span class="n">yaml_dict</span><span class="p">)</span>
<span class="c1"># Traverse HDF5 file hierarchy and add datasets</span>
<span class="n">f</span><span class="o">.</span><span class="n">visititems</span><span class="p">(</span><span class="k">lambda</span> <span class="n">name</span><span class="p">,</span> <span class="n">obj</span><span class="p">:</span> <span class="n">__print_metadata__</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">obj</span><span class="p">,</span> <span class="n">folder_depth</span><span class="p">,</span> <span class="n">yaml_dict</span><span class="p">))</span>
<span class="c1"># Serialize and write the data</span>
<span class="n">output_file_path</span> <span class="o">=</span> <span class="n">output_filename_tail</span> <span class="o">+</span> <span class="s1">&#39;.&#39;</span> <span class="o">+</span> <span class="n">output_format</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_file_path</span><span class="p">,</span> <span class="s1">&#39;w&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">output_file</span><span class="p">:</span>
<span class="k">if</span> <span class="n">output_format</span> <span class="o">==</span> <span class="s1">&#39;json&#39;</span><span class="p">:</span>
<span class="n">json_output</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">yaml_dict</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">sort_keys</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="n">output_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">json_output</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">output_format</span> <span class="o">==</span> <span class="s1">&#39;yaml&#39;</span><span class="p">:</span>
<span class="n">yaml_output</span> <span class="o">=</span> <span class="n">yaml</span><span class="o">.</span><span class="n">dump</span><span class="p">(</span><span class="n">yaml_dict</span><span class="p">,</span> <span class="n">sort_keys</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="n">output_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">yaml_output</span><span class="p">)</span>
<span class="k">return</span> <span class="n">output_file_path</span></div>
<div class="viewcode-block" id="get_groups_at_a_level">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.get_groups_at_a_level">[docs]</a>
<span class="k">def</span> <span class="nf">get_groups_at_a_level</span><span class="p">(</span><span class="n">file</span><span class="p">:</span> <span class="n">h5py</span><span class="o">.</span><span class="n">File</span><span class="p">,</span> <span class="n">level</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
<span class="n">groups</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">def</span> <span class="nf">node_selector</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">obj</span><span class="p">):</span>
<span class="k">if</span> <span class="n">name</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="s1">&#39;/&#39;</span><span class="p">)</span> <span class="o">==</span> <span class="n">level</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="n">name</span><span class="p">)</span>
<span class="n">groups</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">obj</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="n">file</span><span class="o">.</span><span class="n">visititems</span><span class="p">(</span><span class="n">node_selector</span><span class="p">)</span>
<span class="c1">#file.visititems()</span>
<span class="k">return</span> <span class="n">groups</span></div>
<div class="viewcode-block" id="read_mtable_as_dataframe">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_ops.read_mtable_as_dataframe">[docs]</a>
<span class="k">def</span> <span class="nf">read_mtable_as_dataframe</span><span class="p">(</span><span class="n">filename</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Reconstruct a MATLAB Table encoded in a .h5 file as a Pandas DataFrame.</span>
<span class="sd"> This function reads a .h5 file containing a MATLAB Table and reconstructs it as a Pandas DataFrame. </span>
<span class="sd"> The input .h5 file contains one group per row of the MATLAB Table. Each group stores the table&#39;s </span>
<span class="sd"> dataset-like variables as Datasets, while categorical and numerical variables are represented as </span>
<span class="sd"> attributes of the respective group.</span>
<span class="sd"> To ensure homogeneity of data columns, the DataFrame is constructed column-wise.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> filename : str</span>
<span class="sd"> The name of the .h5 file. This may include the file&#39;s location and path information.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> pd.DataFrame</span>
<span class="sd"> The MATLAB Table reconstructed as a Pandas DataFrame.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1">#contructs dataframe by filling out entries columnwise. This way we can ensure homogenous data columns&quot;&quot;&quot;</span>
<span class="k">with</span> <span class="n">h5py</span><span class="o">.</span><span class="n">File</span><span class="p">(</span><span class="n">filename</span><span class="p">,</span><span class="s1">&#39;r&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">file</span><span class="p">:</span>
<span class="c1"># Define group&#39;s attributes and datasets. This should hold</span>
<span class="c1"># for all groups. TODO: implement verification and noncompliance error if needed.</span>
<span class="n">group_list</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">file</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
<span class="n">group_attrs</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">file</span><span class="p">[</span><span class="n">group_list</span><span class="p">[</span><span class="mi">0</span><span class="p">]]</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
<span class="c1"># </span>
<span class="n">column_attr_names</span> <span class="o">=</span> <span class="p">[</span><span class="n">item</span><span class="p">[</span><span class="n">item</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">&#39;_&#39;</span><span class="p">)</span><span class="o">+</span><span class="mi">1</span><span class="p">::]</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">group_attrs</span><span class="p">]</span>
<span class="n">column_attr_names_idx</span> <span class="o">=</span> <span class="p">[</span><span class="nb">int</span><span class="p">(</span><span class="n">item</span><span class="p">[</span><span class="mi">4</span><span class="p">:(</span><span class="n">item</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">&#39;_&#39;</span><span class="p">))])</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">group_attrs</span><span class="p">]</span>
<span class="n">group_datasets</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">file</span><span class="p">[</span><span class="n">group_list</span><span class="p">[</span><span class="mi">0</span><span class="p">]]</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span> <span class="k">if</span> <span class="ow">not</span> <span class="s1">&#39;DS_EMPTY&#39;</span> <span class="ow">in</span> <span class="n">file</span><span class="p">[</span><span class="n">group_list</span><span class="p">[</span><span class="mi">0</span><span class="p">]]</span><span class="o">.</span><span class="n">keys</span><span class="p">()</span> <span class="k">else</span> <span class="p">[]</span>
<span class="c1">#</span>
<span class="n">column_dataset_names</span> <span class="o">=</span> <span class="p">[</span><span class="n">file</span><span class="p">[</span><span class="n">group_list</span><span class="p">[</span><span class="mi">0</span><span class="p">]][</span><span class="n">item</span><span class="p">]</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">&#39;column_name&#39;</span><span class="p">]</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">group_datasets</span><span class="p">]</span>
<span class="n">column_dataset_names_idx</span> <span class="o">=</span> <span class="p">[</span><span class="nb">int</span><span class="p">(</span><span class="n">item</span><span class="p">[</span><span class="mi">2</span><span class="p">:])</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">group_datasets</span><span class="p">]</span>
<span class="c1"># Define data_frame as group_attrs + group_datasets</span>
<span class="c1">#pd_series_index = group_attrs + group_datasets</span>
<span class="n">pd_series_index</span> <span class="o">=</span> <span class="n">column_attr_names</span> <span class="o">+</span> <span class="n">column_dataset_names</span>
<span class="n">output_dataframe</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">columns</span><span class="o">=</span><span class="n">pd_series_index</span><span class="p">,</span><span class="n">index</span><span class="o">=</span><span class="n">group_list</span><span class="p">)</span>
<span class="n">tmp_col</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">meas_prop</span> <span class="ow">in</span> <span class="n">group_attrs</span> <span class="o">+</span> <span class="n">group_datasets</span><span class="p">:</span>
<span class="k">if</span> <span class="n">meas_prop</span> <span class="ow">in</span> <span class="n">group_attrs</span><span class="p">:</span>
<span class="n">column_label</span> <span class="o">=</span> <span class="n">meas_prop</span><span class="p">[</span><span class="n">meas_prop</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">&#39;_&#39;</span><span class="p">)</span><span class="o">+</span><span class="mi">1</span><span class="p">:]</span>
<span class="c1"># Create numerical or categorical column from group&#39;s attributes</span>
<span class="n">tmp_col</span> <span class="o">=</span> <span class="p">[</span><span class="n">file</span><span class="p">[</span><span class="n">group_key</span><span class="p">]</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="n">meas_prop</span><span class="p">][()][</span><span class="mi">0</span><span class="p">]</span> <span class="k">for</span> <span class="n">group_key</span> <span class="ow">in</span> <span class="n">group_list</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># Create dataset column from group&#39;s datasets</span>
<span class="n">column_label</span> <span class="o">=</span> <span class="n">file</span><span class="p">[</span><span class="n">group_list</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">+</span> <span class="s1">&#39;/&#39;</span> <span class="o">+</span> <span class="n">meas_prop</span><span class="p">]</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">&#39;column_name&#39;</span><span class="p">]</span>
<span class="c1">#tmp_col = [file[group_key + &#39;/&#39; + meas_prop][()][0] for group_key in group_list]</span>
<span class="n">tmp_col</span> <span class="o">=</span> <span class="p">[</span><span class="n">file</span><span class="p">[</span><span class="n">group_key</span> <span class="o">+</span> <span class="s1">&#39;/&#39;</span> <span class="o">+</span> <span class="n">meas_prop</span><span class="p">][()]</span> <span class="k">for</span> <span class="n">group_key</span> <span class="ow">in</span> <span class="n">group_list</span><span class="p">]</span>
<span class="n">output_dataframe</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span><span class="n">column_label</span><span class="p">]</span> <span class="o">=</span> <span class="n">tmp_col</span>
<span class="k">return</span> <span class="n">output_dataframe</span></div>
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">&quot;__main__&quot;</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">)</span> <span class="o">&lt;</span> <span class="mi">5</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Usage: python hdf5_ops.py serialize &lt;path/to/target_file.hdf5&gt; &lt;folder_depth : int = 2&gt; &lt;format=json|yaml&gt;&quot;</span><span class="p">)</span>
<span class="n">sys</span><span class="o">.</span><span class="n">exit</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">==</span> <span class="s1">&#39;serialize&#39;</span><span class="p">:</span>
<span class="n">input_hdf5_file</span> <span class="o">=</span> <span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span>
<span class="n">folder_depth</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">[</span><span class="mi">3</span><span class="p">])</span>
<span class="n">file_format</span> <span class="o">=</span> <span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">[</span><span class="mi">4</span><span class="p">]</span>
<span class="k">try</span><span class="p">:</span>
<span class="c1"># Call the serialize_metadata function and capture the output path</span>
<span class="n">path_to_file</span> <span class="o">=</span> <span class="n">serialize_metadata</span><span class="p">(</span><span class="n">input_hdf5_file</span><span class="p">,</span>
<span class="n">folder_depth</span> <span class="o">=</span> <span class="n">folder_depth</span><span class="p">,</span>
<span class="n">output_format</span><span class="o">=</span><span class="n">file_format</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Metadata serialized to </span><span class="si">{</span><span class="n">path_to_file</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;An error occurred during serialization: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="n">sys</span><span class="o">.</span><span class="n">exit</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="c1">#run(sys.argv[2])</span>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, JFFO.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@@ -0,0 +1,180 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>src.hdf5_vis &mdash; DIMA 1.0.0 documentation</title>
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../_static/documentation_options.js?v=8d563738"></script>
<script src="../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script src="../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../index.html" class="icon icon-home">
DIMA
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html">HDF5 data operations</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.hdf5_lib">Data integration with HDF5</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-pipelines.data_integration">Pipelines and workflows</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.hdf5_vis">Data visualization</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-utils.g5505_utils">Utilities</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../index.html">DIMA</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
<li class="breadcrumb-item active">src.hdf5_vis</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for src.hdf5_vis</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">sys</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="n">root_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">curdir</span><span class="p">)</span>
<span class="n">sys</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">root_dir</span><span class="p">)</span>
<span class="kn">import</span> <span class="nn">h5py</span>
<span class="kn">import</span> <span class="nn">yaml</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
<span class="kn">from</span> <span class="nn">plotly.subplots</span> <span class="kn">import</span> <span class="n">make_subplots</span>
<span class="kn">import</span> <span class="nn">plotly.graph_objects</span> <span class="k">as</span> <span class="nn">go</span>
<span class="kn">import</span> <span class="nn">plotly.express</span> <span class="k">as</span> <span class="nn">px</span>
<span class="c1">#import plotly.io as pio</span>
<span class="kn">from</span> <span class="nn">src.hdf5_ops</span> <span class="kn">import</span> <span class="n">get_parent_child_relationships</span>
<div class="viewcode-block" id="display_group_hierarchy_on_a_treemap">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_vis.display_group_hierarchy_on_a_treemap">[docs]</a>
<span class="k">def</span> <span class="nf">display_group_hierarchy_on_a_treemap</span><span class="p">(</span><span class="n">filename</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> filename (str): hdf5 file&#39;s filename&quot;&quot;&quot;</span>
<span class="k">with</span> <span class="n">h5py</span><span class="o">.</span><span class="n">File</span><span class="p">(</span><span class="n">filename</span><span class="p">,</span><span class="s1">&#39;r&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">file</span><span class="p">:</span>
<span class="n">nodes</span><span class="p">,</span> <span class="n">parents</span><span class="p">,</span> <span class="n">values</span> <span class="o">=</span> <span class="n">get_parent_child_relationships</span><span class="p">(</span><span class="n">file</span><span class="p">)</span>
<span class="n">metadata_list</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">metadata_dict</span><span class="o">=</span><span class="p">{}</span>
<span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">file</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
<span class="c1">#if &#39;metadata&#39; in key:</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">file</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="n">key</span><span class="p">],</span> <span class="nb">str</span><span class="p">):</span> <span class="c1"># Check if the attribute is a string</span>
<span class="n">metadata_key</span> <span class="o">=</span> <span class="n">key</span><span class="p">[</span><span class="n">key</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">&#39;_&#39;</span><span class="p">)</span> <span class="o">+</span> <span class="mi">1</span><span class="p">:]</span>
<span class="n">metadata_value</span> <span class="o">=</span> <span class="n">file</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="n">key</span><span class="p">]</span>
<span class="n">metadata_dict</span><span class="p">[</span><span class="n">metadata_key</span><span class="p">]</span> <span class="o">=</span> <span class="n">metadata_value</span>
<span class="n">metadata_list</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">metadata_key</span><span class="si">}</span><span class="s1">: </span><span class="si">{</span><span class="n">metadata_value</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="c1">#metadata_dict[key[key.find(&#39;_&#39;)+1::]]= file.attrs[key]</span>
<span class="c1">#metadata_list.append(key[key.find(&#39;_&#39;)+1::]+&#39;:&#39;+file.attrs[key])</span>
<span class="n">metadata</span> <span class="o">=</span> <span class="s1">&#39;&lt;br&gt;&#39;</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="s1">&#39;&lt;br&gt;&#39;</span><span class="p">]</span> <span class="o">+</span> <span class="n">metadata_list</span><span class="p">)</span>
<span class="n">customdata_series</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">(</span><span class="n">nodes</span><span class="p">)</span>
<span class="n">customdata_series</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="n">metadata</span>
<span class="n">fig</span> <span class="o">=</span> <span class="n">make_subplots</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">specs</span><span class="o">=</span><span class="p">[[{</span><span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="s2">&quot;domain&quot;</span><span class="p">}]],)</span>
<span class="n">fig</span><span class="o">.</span><span class="n">add_trace</span><span class="p">(</span><span class="n">go</span><span class="o">.</span><span class="n">Treemap</span><span class="p">(</span>
<span class="n">labels</span><span class="o">=</span><span class="n">nodes</span><span class="p">,</span> <span class="c1">#formating_df[&#39;formated_names&#39;][nodes],</span>
<span class="n">parents</span><span class="o">=</span><span class="n">parents</span><span class="p">,</span><span class="c1">#formating_df[&#39;formated_names&#39;][parents],</span>
<span class="n">values</span><span class="o">=</span><span class="n">values</span><span class="p">,</span>
<span class="n">branchvalues</span><span class="o">=</span><span class="s1">&#39;remainder&#39;</span><span class="p">,</span>
<span class="n">customdata</span><span class="o">=</span> <span class="n">customdata_series</span><span class="p">,</span>
<span class="c1">#marker=dict(</span>
<span class="c1"># colors=df_all_trees[&#39;color&#39;],</span>
<span class="c1"># colorscale=&#39;RdBu&#39;,</span>
<span class="c1"># cmid=average_score),</span>
<span class="c1">#hovertemplate=&#39;&lt;b&gt;%{label} &lt;/b&gt; &lt;br&gt; Number of files: %{value}&lt;br&gt; Success rate: %{color:.2f}&#39;,</span>
<span class="n">hovertemplate</span><span class="o">=</span><span class="s1">&#39;&lt;b&gt;%</span><span class="si">{label}</span><span class="s1"> &lt;/b&gt; &lt;br&gt; Count: %</span><span class="si">{value}</span><span class="s1"> &lt;br&gt; Path: %</span><span class="si">{customdata}</span><span class="s1">&#39;</span><span class="p">,</span>
<span class="n">name</span><span class="o">=</span><span class="s1">&#39;&#39;</span><span class="p">,</span>
<span class="n">root_color</span><span class="o">=</span><span class="s2">&quot;lightgrey&quot;</span>
<span class="p">))</span>
<span class="n">fig</span><span class="o">.</span><span class="n">update_layout</span><span class="p">(</span><span class="n">width</span> <span class="o">=</span> <span class="mi">800</span><span class="p">,</span> <span class="n">height</span><span class="o">=</span> <span class="mi">600</span><span class="p">,</span> <span class="n">margin</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span><span class="n">t</span><span class="o">=</span><span class="mi">50</span><span class="p">,</span> <span class="n">l</span><span class="o">=</span><span class="mi">25</span><span class="p">,</span> <span class="n">r</span><span class="o">=</span><span class="mi">25</span><span class="p">,</span> <span class="n">b</span><span class="o">=</span><span class="mi">25</span><span class="p">))</span>
<span class="n">fig</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
<span class="n">file_name</span><span class="p">,</span> <span class="n">file_ext</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">splitext</span><span class="p">(</span><span class="n">filename</span><span class="p">)</span>
<span class="n">fig</span><span class="o">.</span><span class="n">write_html</span><span class="p">(</span><span class="n">file_name</span> <span class="o">+</span> <span class="s2">&quot;.html&quot;</span><span class="p">)</span></div>
<span class="c1">#pio.write_image(fig,file_name + &quot;.png&quot;,width=800,height=600,format=&#39;png&#39;)</span>
<span class="c1">#</span>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, JFFO.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@@ -0,0 +1,513 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>src.hdf5_writer &mdash; DIMA 1.0.0 documentation</title>
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../_static/documentation_options.js?v=8d563738"></script>
<script src="../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script src="../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../index.html" class="icon icon-home">
DIMA
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html">HDF5 data operations</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.hdf5_writer">Data integration with HDF5</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.hdf5_vis">Data visualization</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/pipelines.html">Pipelines and workflows</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/utils.html">Utilities</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../index.html">DIMA</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
<li class="breadcrumb-item active">src.hdf5_writer</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for src.hdf5_writer</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">sys</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="n">root_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">curdir</span><span class="p">)</span>
<span class="n">sys</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">root_dir</span><span class="p">)</span>
<span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">import</span> <span class="nn">h5py</span>
<span class="kn">import</span> <span class="nn">logging</span>
<span class="kn">import</span> <span class="nn">utils.g5505_utils</span> <span class="k">as</span> <span class="nn">utils</span>
<span class="kn">import</span> <span class="nn">instruments.readers.filereader_registry</span> <span class="k">as</span> <span class="nn">filereader_registry</span>
<span class="k">def</span> <span class="nf">__transfer_file_dict_to_hdf5</span><span class="p">(</span><span class="n">h5file</span><span class="p">,</span> <span class="n">group_name</span><span class="p">,</span> <span class="n">file_dict</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Transfers data from a file_dict to an HDF5 file.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> h5file : h5py.File</span>
<span class="sd"> HDF5 file object where the data will be written.</span>
<span class="sd"> group_name : str</span>
<span class="sd"> Name of the HDF5 group where data will be stored.</span>
<span class="sd"> file_dict : dict</span>
<span class="sd"> Dictionary containing file data to be transferred. Required structure:</span>
<span class="sd"> {</span>
<span class="sd"> &#39;name&#39;: str,</span>
<span class="sd"> &#39;attributes_dict&#39;: dict,</span>
<span class="sd"> &#39;datasets&#39;: [</span>
<span class="sd"> {</span>
<span class="sd"> &#39;name&#39;: str,</span>
<span class="sd"> &#39;data&#39;: array-like,</span>
<span class="sd"> &#39;shape&#39;: tuple,</span>
<span class="sd"> &#39;attributes&#39;: dict (optional)</span>
<span class="sd"> },</span>
<span class="sd"> ...</span>
<span class="sd"> ]</span>
<span class="sd"> }</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> None</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">file_dict</span><span class="p">:</span>
<span class="k">return</span>
<span class="k">try</span><span class="p">:</span>
<span class="c1"># Create group and add their attributes</span>
<span class="n">filename</span> <span class="o">=</span> <span class="n">file_dict</span><span class="p">[</span><span class="s1">&#39;name&#39;</span><span class="p">]</span>
<span class="n">group</span> <span class="o">=</span> <span class="n">h5file</span><span class="p">[</span><span class="n">group_name</span><span class="p">]</span><span class="o">.</span><span class="n">create_group</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="n">filename</span><span class="p">)</span>
<span class="c1"># Add group attributes </span>
<span class="n">group</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">file_dict</span><span class="p">[</span><span class="s1">&#39;attributes_dict&#39;</span><span class="p">])</span>
<span class="c1"># Add datasets to the just created group</span>
<span class="k">for</span> <span class="n">dataset</span> <span class="ow">in</span> <span class="n">file_dict</span><span class="p">[</span><span class="s1">&#39;datasets&#39;</span><span class="p">]:</span>
<span class="n">dataset_obj</span> <span class="o">=</span> <span class="n">group</span><span class="o">.</span><span class="n">create_dataset</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="n">dataset</span><span class="p">[</span><span class="s1">&#39;name&#39;</span><span class="p">],</span>
<span class="n">data</span><span class="o">=</span><span class="n">dataset</span><span class="p">[</span><span class="s1">&#39;data&#39;</span><span class="p">],</span>
<span class="n">shape</span><span class="o">=</span><span class="n">dataset</span><span class="p">[</span><span class="s1">&#39;shape&#39;</span><span class="p">]</span>
<span class="p">)</span>
<span class="c1"># Add dataset&#39;s attributes </span>
<span class="n">attributes</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;attributes&#39;</span><span class="p">,</span> <span class="p">{})</span>
<span class="n">dataset_obj</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">attributes</span><span class="p">)</span>
<span class="n">group</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">&#39;last_update_date&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">created_at</span><span class="p">()</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s1">&#39;utf-8&#39;</span><span class="p">)</span>
<span class="n">stdout</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">&#39;Completed transfer for /</span><span class="si">{</span><span class="n">group_name</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">filename</span><span class="si">}</span><span class="s1">&#39;</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">inst</span><span class="p">:</span>
<span class="n">stdout</span> <span class="o">=</span> <span class="n">inst</span>
<span class="n">logging</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s1">&#39;Failed to transfer data into HDF5: </span><span class="si">%s</span><span class="s1">&#39;</span><span class="p">,</span> <span class="n">inst</span><span class="p">)</span>
<span class="k">return</span> <span class="n">stdout</span>
<span class="k">def</span> <span class="nf">__copy_file_in_group</span><span class="p">(</span><span class="n">source_file_path</span><span class="p">,</span> <span class="n">dest_file_obj</span> <span class="p">:</span> <span class="n">h5py</span><span class="o">.</span><span class="n">File</span><span class="p">,</span> <span class="n">dest_group_name</span><span class="p">,</span> <span class="n">work_with_copy</span> <span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">):</span>
<span class="c1"># Create copy of original file to avoid possible file corruption and work with it.</span>
<span class="k">if</span> <span class="n">work_with_copy</span><span class="p">:</span>
<span class="n">tmp_file_path</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">make_file_copy</span><span class="p">(</span><span class="n">source_file_path</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">tmp_file_path</span> <span class="o">=</span> <span class="n">source_file_path</span>
<span class="c1"># Open backup h5 file and copy complet filesystem directory onto a group in h5file</span>
<span class="k">with</span> <span class="n">h5py</span><span class="o">.</span><span class="n">File</span><span class="p">(</span><span class="n">tmp_file_path</span><span class="p">,</span><span class="s1">&#39;r&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">src_file</span><span class="p">:</span>
<span class="n">dest_file_obj</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">source</span><span class="o">=</span> <span class="n">src_file</span><span class="p">[</span><span class="s1">&#39;/&#39;</span><span class="p">],</span> <span class="n">dest</span><span class="o">=</span> <span class="n">dest_group_name</span><span class="p">)</span>
<span class="k">if</span> <span class="s1">&#39;tmp_files&#39;</span> <span class="ow">in</span> <span class="n">tmp_file_path</span><span class="p">:</span>
<span class="n">os</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">tmp_file_path</span><span class="p">)</span>
<span class="n">stdout</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">&#39;Completed transfer for /</span><span class="si">{</span><span class="n">dest_group_name</span><span class="si">}</span><span class="s1">&#39;</span>
<span class="k">return</span> <span class="n">stdout</span>
<div class="viewcode-block" id="create_hdf5_file_from_filesystem_path">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_writer.create_hdf5_file_from_filesystem_path">[docs]</a>
<span class="k">def</span> <span class="nf">create_hdf5_file_from_filesystem_path</span><span class="p">(</span><span class="n">path_to_input_directory</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
<span class="n">path_to_filenames_dict</span><span class="p">:</span> <span class="nb">dict</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">select_dir_keywords</span> <span class="p">:</span> <span class="nb">list</span> <span class="o">=</span> <span class="p">[],</span>
<span class="n">root_metadata_dict</span> <span class="p">:</span> <span class="nb">dict</span> <span class="o">=</span> <span class="p">{},</span> <span class="n">mode</span> <span class="o">=</span> <span class="s1">&#39;w&#39;</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Creates an .h5 file with name &quot;output_filename&quot; that preserves the directory tree (or folder structure)</span>
<span class="sd"> of a given filesystem path.</span>
<span class="sd"> The data integration capabilities are limited by our file reader, which can only access data from a list of</span>
<span class="sd"> admissible file formats. These, however, can be extended. Directories are groups in the resulting HDF5 file.</span>
<span class="sd"> Files are formatted as composite objects consisting of a group, file, and attributes.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> output_filename : str</span>
<span class="sd"> Name of the output HDF5 file.</span>
<span class="sd"> path_to_input_directory : str</span>
<span class="sd"> Path to root directory, specified with forward slashes, e.g., path/to/root.</span>
<span class="sd"> path_to_filenames_dict : dict, optional</span>
<span class="sd"> A pre-processed dictionary where keys are directory paths on the input directory&#39;s tree and values are lists of files.</span>
<span class="sd"> If provided, &#39;input_file_system_path&#39; is ignored.</span>
<span class="sd"> select_dir_keywords : list</span>
<span class="sd"> List of string elements to consider or select only directory paths that contain</span>
<span class="sd"> a word in &#39;select_dir_keywords&#39;. When empty, all directory paths are considered</span>
<span class="sd"> to be included in the HDF5 file group hierarchy.</span>
<span class="sd"> root_metadata_dict : dict</span>
<span class="sd"> Metadata to include at the root level of the HDF5 file.</span>
<span class="sd"> mode : str</span>
<span class="sd"> &#39;w&#39; create File, truncate if it exists, or &#39;r+&#39; read/write, File must exists. By default, mode = &quot;w&quot;.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> output_filename : str</span>
<span class="sd"> Path to the created HDF5 file.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">mode</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">&#39;w&#39;</span><span class="p">,</span><span class="s1">&#39;r+&#39;</span><span class="p">]:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;Parameter mode must take values in [&quot;w&quot;,&quot;r+&quot;]&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="s1">&#39;/&#39;</span> <span class="ow">in</span> <span class="n">path_to_input_directory</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">&#39;path_to_input_directory needs to be specified using forward slashes &quot;/&quot;.&#39;</span> <span class="p">)</span>
<span class="c1">#path_to_output_directory = os.path.join(path_to_input_directory,&#39;..&#39;)</span>
<span class="n">path_to_input_directory</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">normpath</span><span class="p">(</span><span class="n">path_to_input_directory</span><span class="p">)</span><span class="o">.</span><span class="n">rstrip</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">sep</span><span class="p">)</span>
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">keyword</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">select_dir_keywords</span><span class="p">):</span>
<span class="n">select_dir_keywords</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="n">keyword</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">&#39;/&#39;</span><span class="p">,</span><span class="n">os</span><span class="o">.</span><span class="n">sep</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">path_to_filenames_dict</span><span class="p">:</span>
<span class="c1"># On dry_run=True, returns path to files dictionary of the output directory without making a actual copy of the input directory. </span>
<span class="c1"># Therefore, there wont be a copying conflict by setting up input and output directories the same</span>
<span class="n">path_to_filenames_dict</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">copy_directory_with_contraints</span><span class="p">(</span><span class="n">input_dir_path</span><span class="o">=</span><span class="n">path_to_input_directory</span><span class="p">,</span>
<span class="n">output_dir_path</span><span class="o">=</span><span class="n">path_to_input_directory</span><span class="p">,</span>
<span class="n">dry_run</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="c1"># Set input_directory as copied input directory</span>
<span class="n">root_dir</span> <span class="o">=</span> <span class="n">path_to_input_directory</span>
<span class="n">path_to_output_file</span> <span class="o">=</span> <span class="n">path_to_input_directory</span><span class="o">.</span><span class="n">rstrip</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">sep</span><span class="p">)</span> <span class="o">+</span> <span class="s1">&#39;.h5&#39;</span>
<span class="n">start_message</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">&#39;</span><span class="se">\n</span><span class="s1">[Start] Data integration :</span><span class="se">\n</span><span class="s1">Source: </span><span class="si">{</span><span class="n">path_to_input_directory</span><span class="si">}</span><span class="se">\n</span><span class="s1">Destination: </span><span class="si">{</span><span class="n">path_to_output_file</span><span class="si">}</span><span class="se">\n</span><span class="s1">&#39;</span>
<span class="nb">print</span><span class="p">(</span><span class="n">start_message</span><span class="p">)</span>
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="n">start_message</span><span class="p">)</span>
<span class="c1"># Check if the .h5 file already exists</span>
<span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">path_to_output_file</span><span class="p">)</span> <span class="ow">and</span> <span class="n">mode</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">&#39;w&#39;</span><span class="p">]:</span>
<span class="n">message</span> <span class="o">=</span> <span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;[Notice] The file &#39;</span><span class="si">{</span><span class="n">path_to_output_file</span><span class="si">}</span><span class="s2">&#39; already exists and will not be overwritten.</span><span class="se">\n</span><span class="s2">&quot;</span>
<span class="s2">&quot;If you wish to replace it, please delete the existing file first and rerun the program.&quot;</span>
<span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="n">message</span><span class="p">)</span>
<span class="n">logging</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="n">message</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">with</span> <span class="n">h5py</span><span class="o">.</span><span class="n">File</span><span class="p">(</span><span class="n">path_to_output_file</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="n">mode</span><span class="p">,</span> <span class="n">track_order</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> <span class="k">as</span> <span class="n">h5file</span><span class="p">:</span>
<span class="n">number_of_dirs</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">path_to_filenames_dict</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
<span class="n">dir_number</span> <span class="o">=</span> <span class="mi">1</span>
<span class="k">for</span> <span class="n">dirpath</span><span class="p">,</span> <span class="n">filtered_filenames_list</span> <span class="ow">in</span> <span class="n">path_to_filenames_dict</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="c1"># Check if filtered_filenames_list is nonempty. TODO: This is perhaps redundant by design of path_to_filenames_dict. </span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">filtered_filenames_list</span><span class="p">:</span>
<span class="k">continue</span>
<span class="n">group_name</span> <span class="o">=</span> <span class="n">dirpath</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">sep</span><span class="p">,</span><span class="s1">&#39;/&#39;</span><span class="p">)</span>
<span class="n">group_name</span> <span class="o">=</span> <span class="n">group_name</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">root_dir</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">sep</span><span class="p">,</span><span class="s1">&#39;/&#39;</span><span class="p">)</span> <span class="o">+</span> <span class="s1">&#39;/&#39;</span><span class="p">,</span> <span class="s1">&#39;/&#39;</span><span class="p">)</span>
<span class="c1"># Flatten group name to one level</span>
<span class="k">if</span> <span class="n">select_dir_keywords</span><span class="p">:</span>
<span class="n">offset</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">([</span><span class="nb">len</span><span class="p">(</span><span class="n">i</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">sep</span><span class="p">))</span> <span class="k">if</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">dirpath</span> <span class="k">else</span> <span class="mi">0</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">select_dir_keywords</span><span class="p">])</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">offset</span> <span class="o">=</span> <span class="mi">1</span>
<span class="n">tmp_list</span> <span class="o">=</span> <span class="n">group_name</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;/&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">tmp_list</span><span class="p">)</span> <span class="o">&gt;</span> <span class="n">offset</span><span class="o">+</span><span class="mi">1</span><span class="p">:</span>
<span class="n">group_name</span> <span class="o">=</span> <span class="s1">&#39;/&#39;</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="n">tmp_list</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">offset</span><span class="o">+</span><span class="mi">1</span><span class="p">)])</span>
<span class="c1"># Create group called &quot;group_name&quot;. Hierarchy of nested groups can be implicitly defined by the forward slashes</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">group_name</span> <span class="ow">in</span> <span class="n">h5file</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
<span class="n">h5file</span><span class="o">.</span><span class="n">create_group</span><span class="p">(</span><span class="n">group_name</span><span class="p">)</span>
<span class="n">h5file</span><span class="p">[</span><span class="n">group_name</span><span class="p">]</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s1">&#39;creation_date&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">created_at</span><span class="p">()</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s1">&#39;utf-8&#39;</span><span class="p">)</span>
<span class="c1">#h5file[group_name].attrs.create(name=&#39;filtered_file_list&#39;,data=convert_string_to_bytes(filtered_filename_list))</span>
<span class="c1">#h5file[group_name].attrs.create(name=&#39;file_list&#39;,data=convert_string_to_bytes(filenames_list))</span>
<span class="c1">#else: </span>
<span class="c1">#print(group_name,&#39; was already created.&#39;) </span>
<span class="n">instFoldermsgStart</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">&#39;Starting data transfer from instFolder: </span><span class="si">{</span><span class="n">group_name</span><span class="si">}</span><span class="s1">&#39;</span>
<span class="nb">print</span><span class="p">(</span><span class="n">instFoldermsgStart</span><span class="p">)</span>
<span class="k">for</span> <span class="n">filenumber</span><span class="p">,</span> <span class="n">filename</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">filtered_filenames_list</span><span class="p">):</span>
<span class="c1">#file_ext = os.path.splitext(filename)[1]</span>
<span class="c1">#try: </span>
<span class="c1"># hdf5 path to filename group </span>
<span class="n">dest_group_name</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">group_name</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">filename</span><span class="si">}</span><span class="s1">&#39;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="s1">&#39;h5&#39;</span> <span class="ow">in</span> <span class="n">filename</span><span class="p">:</span>
<span class="c1">#file_dict = config_file.select_file_readers(group_id)[file_ext](os.path.join(dirpath,filename))</span>
<span class="c1">#file_dict = ext_to_reader_dict[file_ext](os.path.join(dirpath,filename))</span>
<span class="n">file_dict</span> <span class="o">=</span> <span class="n">filereader_registry</span><span class="o">.</span><span class="n">select_file_reader</span><span class="p">(</span><span class="n">dest_group_name</span><span class="p">)(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">dirpath</span><span class="p">,</span><span class="n">filename</span><span class="p">))</span>
<span class="n">stdout</span> <span class="o">=</span> <span class="n">__transfer_file_dict_to_hdf5</span><span class="p">(</span><span class="n">h5file</span><span class="p">,</span> <span class="n">group_name</span><span class="p">,</span> <span class="n">file_dict</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">source_file_path</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">dirpath</span><span class="p">,</span><span class="n">filename</span><span class="p">)</span>
<span class="n">dest_file_obj</span> <span class="o">=</span> <span class="n">h5file</span>
<span class="c1">#group_name +&#39;/&#39;+filename</span>
<span class="c1">#ext_to_reader_dict[file_ext](source_file_path, dest_file_obj, dest_group_name)</span>
<span class="c1">#g5505f_reader.select_file_reader(dest_group_name)(source_file_path, dest_file_obj, dest_group_name)</span>
<span class="n">stdout</span> <span class="o">=</span> <span class="n">__copy_file_in_group</span><span class="p">(</span><span class="n">source_file_path</span><span class="p">,</span> <span class="n">dest_file_obj</span><span class="p">,</span> <span class="n">dest_group_name</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
<span class="c1"># Update the progress bar and log the end message</span>
<span class="n">instFoldermsdEnd</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">&#39;</span><span class="se">\n</span><span class="s1">Completed data transfer for instFolder: </span><span class="si">{</span><span class="n">group_name</span><span class="si">}</span><span class="se">\n</span><span class="s1">&#39;</span>
<span class="c1"># Print and log the start message</span>
<span class="n">utils</span><span class="o">.</span><span class="n">progressBar</span><span class="p">(</span><span class="n">dir_number</span><span class="p">,</span> <span class="n">number_of_dirs</span><span class="p">,</span> <span class="n">instFoldermsdEnd</span><span class="p">)</span>
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="n">instFoldermsdEnd</span> <span class="p">)</span>
<span class="n">dir_number</span> <span class="o">=</span> <span class="n">dir_number</span> <span class="o">+</span> <span class="mi">1</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;[End] Data integration&#39;</span><span class="p">)</span>
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">&#39;[End] Data integration&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">root_metadata_dict</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span><span class="o">&gt;</span><span class="mi">0</span><span class="p">:</span>
<span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">root_metadata_dict</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="c1">#if key in h5file.attrs:</span>
<span class="c1"># del h5file.attrs[key]</span>
<span class="n">h5file</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span>
<span class="c1">#annotate_root_dir(output_filename,root_metadata_dict) </span>
<span class="c1">#output_yml_filename_path = hdf5_vis.take_yml_snapshot_of_hdf5_file(output_filename)</span>
<span class="k">return</span> <span class="n">path_to_output_file</span> <span class="c1">#, output_yml_filename_path</span></div>
<div class="viewcode-block" id="create_hdf5_file_from_dataframe">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_writer.create_hdf5_file_from_dataframe">[docs]</a>
<span class="k">def</span> <span class="nf">create_hdf5_file_from_dataframe</span><span class="p">(</span><span class="n">ofilename</span><span class="p">,</span> <span class="n">input_data</span><span class="p">,</span> <span class="n">group_by_funcs</span><span class="p">:</span> <span class="nb">list</span><span class="p">,</span> <span class="n">approach</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">extract_attrs_func</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Creates an HDF5 file with hierarchical groups based on the specified grouping functions or columns.</span>
<span class="sd"> Parameters:</span>
<span class="sd"> -----------</span>
<span class="sd"> ofilename (str): Path for the output HDF5 file.</span>
<span class="sd"> input_data (pd.DataFrame or str): Input data as a DataFrame or a valid file system path.</span>
<span class="sd"> group_by_funcs (list): List of callables or column names to define hierarchical grouping.</span>
<span class="sd"> approach (str): Specifies the approach (&#39;top-down&#39; or &#39;bottom-up&#39;) for creating the HDF5 file.</span>
<span class="sd"> extract_attrs_func (callable, optional): Function to extract additional attributes for HDF5 groups.</span>
<span class="sd"> Returns:</span>
<span class="sd"> --------</span>
<span class="sd"> None</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># Check whether input_data is a valid file-system path or a DataFrame</span>
<span class="n">is_valid_path</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span> <span class="k">else</span> <span class="kc">False</span>
<span class="k">if</span> <span class="n">is_valid_path</span><span class="p">(</span><span class="n">input_data</span><span class="p">):</span>
<span class="c1"># If input_data is a file-system path, create a DataFrame with file info</span>
<span class="n">file_list</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">listdir</span><span class="p">(</span><span class="n">input_data</span><span class="p">)</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">file_list</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;filename&#39;</span><span class="p">])</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">augment_with_filetype</span><span class="p">(</span><span class="n">df</span><span class="p">)</span> <span class="c1"># Add filetype information if needed</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">input_data</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">):</span>
<span class="c1"># If input_data is a DataFrame, make a copy</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">input_data</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;input_data must be either a valid file-system path or a DataFrame.&quot;</span><span class="p">)</span>
<span class="c1"># Generate grouping columns based on group_by_funcs</span>
<span class="k">if</span> <span class="n">utils</span><span class="o">.</span><span class="n">is_callable_list</span><span class="p">(</span><span class="n">group_by_funcs</span><span class="p">):</span>
<span class="n">grouping_cols</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">func</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">group_by_funcs</span><span class="p">):</span>
<span class="n">col_name</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">&#39;level_</span><span class="si">{</span><span class="n">i</span><span class="si">}</span><span class="s1">_groups&#39;</span>
<span class="n">grouping_cols</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">col_name</span><span class="p">)</span>
<span class="n">df</span><span class="p">[</span><span class="n">col_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">func</span><span class="p">(</span><span class="n">df</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">utils</span><span class="o">.</span><span class="n">is_str_list</span><span class="p">(</span><span class="n">group_by_funcs</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">all</span><span class="p">([</span><span class="n">item</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">columns</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">group_by_funcs</span><span class="p">]):</span>
<span class="n">grouping_cols</span> <span class="o">=</span> <span class="n">group_by_funcs</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;&#39;group_by_funcs&#39; must be a list of callables or valid column names in the DataFrame.&quot;</span><span class="p">)</span>
<span class="c1"># Generate group paths</span>
<span class="n">df</span><span class="p">[</span><span class="s1">&#39;group_path&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;/&#39;</span> <span class="o">+</span> <span class="s1">&#39;/&#39;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">row</span><span class="p">)</span> <span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">df</span><span class="p">[</span><span class="n">grouping_cols</span><span class="p">]</span><span class="o">.</span><span class="n">values</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">str</span><span class="p">)]</span>
<span class="c1"># Open the HDF5 file in write mode</span>
<span class="k">with</span> <span class="n">h5py</span><span class="o">.</span><span class="n">File</span><span class="p">(</span><span class="n">ofilename</span><span class="p">,</span> <span class="s1">&#39;w&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">file</span><span class="p">:</span>
<span class="k">for</span> <span class="n">group_path</span> <span class="ow">in</span> <span class="n">df</span><span class="p">[</span><span class="s1">&#39;group_path&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">unique</span><span class="p">():</span>
<span class="c1"># Create groups in HDF5</span>
<span class="n">group</span> <span class="o">=</span> <span class="n">file</span><span class="o">.</span><span class="n">create_group</span><span class="p">(</span><span class="n">group_path</span><span class="p">)</span>
<span class="c1"># Filter the DataFrame for the current group</span>
<span class="n">datatable</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="n">df</span><span class="p">[</span><span class="s1">&#39;group_path&#39;</span><span class="p">]</span> <span class="o">==</span> <span class="n">group_path</span><span class="p">]</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="c1"># Drop grouping columns and the generated &#39;group_path&#39;</span>
<span class="n">datatable</span> <span class="o">=</span> <span class="n">datatable</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">columns</span><span class="o">=</span><span class="n">grouping_cols</span> <span class="o">+</span> <span class="p">[</span><span class="s1">&#39;group_path&#39;</span><span class="p">])</span>
<span class="c1"># Add datasets to groups if data exists</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">datatable</span><span class="o">.</span><span class="n">empty</span><span class="p">:</span>
<span class="n">dataset</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">convert_dataframe_to_np_structured_array</span><span class="p">(</span><span class="n">datatable</span><span class="p">)</span>
<span class="n">group</span><span class="o">.</span><span class="n">create_dataset</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">&#39;data_table&#39;</span><span class="p">,</span> <span class="n">data</span><span class="o">=</span><span class="n">dataset</span><span class="p">)</span>
<span class="c1"># Add attributes if extract_attrs_func is provided</span>
<span class="k">if</span> <span class="n">extract_attrs_func</span><span class="p">:</span>
<span class="n">attrs</span> <span class="o">=</span> <span class="n">extract_attrs_func</span><span class="p">(</span><span class="n">datatable</span><span class="p">)</span>
<span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">attrs</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="n">group</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">value</span>
<span class="c1"># Save metadata about depth of hierarchy</span>
<span class="n">file</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s1">&#39;depth&#39;</span><span class="p">,</span> <span class="n">data</span><span class="o">=</span><span class="nb">len</span><span class="p">(</span><span class="n">grouping_cols</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;HDF5 file created successfully at </span><span class="si">{</span><span class="n">ofilename</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">ofilename</span></div>
<div class="viewcode-block" id="save_processed_dataframe_to_hdf5">
<a class="viewcode-back" href="../../modules/src.html#src.hdf5_writer.save_processed_dataframe_to_hdf5">[docs]</a>
<span class="k">def</span> <span class="nf">save_processed_dataframe_to_hdf5</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">annotator</span><span class="p">,</span> <span class="n">output_filename</span><span class="p">):</span> <span class="c1"># src_hdf5_path, script_date, script_name):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Save processed dataframe columns with annotations to an HDF5 file.</span>
<span class="sd"> Parameters:</span>
<span class="sd"> df (pd.DataFrame): DataFrame containing processed time series.</span>
<span class="sd"> annotator (): Annotator object with get_metadata method.</span>
<span class="sd"> output_filename (str): Path to the source HDF5 file.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># Convert datetime columns to string</span>
<span class="n">datetime_cols</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">select_dtypes</span><span class="p">(</span><span class="n">include</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;datetime64&#39;</span><span class="p">])</span><span class="o">.</span><span class="n">columns</span>
<span class="k">if</span> <span class="nb">list</span><span class="p">(</span><span class="n">datetime_cols</span><span class="p">):</span>
<span class="n">df</span><span class="p">[</span><span class="n">datetime_cols</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="n">datetime_cols</span><span class="p">]</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="nb">str</span><span class="p">)</span>
<span class="c1"># Convert dataframe to structured array</span>
<span class="n">icad_data_table</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">convert_dataframe_to_np_structured_array</span><span class="p">(</span><span class="n">df</span><span class="p">)</span>
<span class="c1"># Get metadata</span>
<span class="n">metadata_dict</span> <span class="o">=</span> <span class="n">annotator</span><span class="o">.</span><span class="n">get_metadata</span><span class="p">()</span>
<span class="c1"># Prepare project level attributes to be added at the root level</span>
<span class="n">project_level_attributes</span> <span class="o">=</span> <span class="n">metadata_dict</span><span class="p">[</span><span class="s1">&#39;metadata&#39;</span><span class="p">][</span><span class="s1">&#39;project&#39;</span><span class="p">]</span>
<span class="c1"># Prepare high-level attributes</span>
<span class="n">high_level_attributes</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;parent_files&#39;</span><span class="p">:</span> <span class="n">metadata_dict</span><span class="p">[</span><span class="s1">&#39;parent_files&#39;</span><span class="p">],</span>
<span class="o">**</span><span class="n">metadata_dict</span><span class="p">[</span><span class="s1">&#39;metadata&#39;</span><span class="p">][</span><span class="s1">&#39;sample&#39;</span><span class="p">],</span>
<span class="o">**</span><span class="n">metadata_dict</span><span class="p">[</span><span class="s1">&#39;metadata&#39;</span><span class="p">][</span><span class="s1">&#39;environment&#39;</span><span class="p">],</span>
<span class="o">**</span><span class="n">metadata_dict</span><span class="p">[</span><span class="s1">&#39;metadata&#39;</span><span class="p">][</span><span class="s1">&#39;instruments&#39;</span><span class="p">]</span>
<span class="p">}</span>
<span class="c1"># Prepare data level attributes</span>
<span class="n">data_level_attributes</span> <span class="o">=</span> <span class="n">metadata_dict</span><span class="p">[</span><span class="s1">&#39;metadata&#39;</span><span class="p">][</span><span class="s1">&#39;datasets&#39;</span><span class="p">]</span>
<span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">data_level_attributes</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span><span class="nb">dict</span><span class="p">):</span>
<span class="n">data_level_attributes</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">convert_attrdict_to_np_structured_array</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
<span class="c1"># Prepare file dictionary</span>
<span class="n">file_dict</span> <span class="o">=</span> <span class="p">{</span>
<span class="s1">&#39;name&#39;</span><span class="p">:</span> <span class="n">project_level_attributes</span><span class="p">[</span><span class="s1">&#39;processing_file&#39;</span><span class="p">],</span>
<span class="s1">&#39;attributes_dict&#39;</span><span class="p">:</span> <span class="n">high_level_attributes</span><span class="p">,</span>
<span class="s1">&#39;datasets&#39;</span><span class="p">:</span> <span class="p">[{</span>
<span class="s1">&#39;name&#39;</span><span class="p">:</span> <span class="s2">&quot;data_table&quot;</span><span class="p">,</span>
<span class="s1">&#39;data&#39;</span><span class="p">:</span> <span class="n">icad_data_table</span><span class="p">,</span>
<span class="s1">&#39;shape&#39;</span><span class="p">:</span> <span class="n">icad_data_table</span><span class="o">.</span><span class="n">shape</span><span class="p">,</span>
<span class="s1">&#39;attributes&#39;</span><span class="p">:</span> <span class="n">data_level_attributes</span>
<span class="p">}]</span>
<span class="p">}</span>
<span class="c1"># Check if the file exists</span>
<span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">output_filename</span><span class="p">):</span>
<span class="n">mode</span> <span class="o">=</span> <span class="s2">&quot;a&quot;</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;File </span><span class="si">{</span><span class="n">output_filename</span><span class="si">}</span><span class="s2"> exists. Opening in append mode.&quot;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">mode</span> <span class="o">=</span> <span class="s2">&quot;w&quot;</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;File </span><span class="si">{</span><span class="n">output_filename</span><span class="si">}</span><span class="s2"> does not exist. Creating a new file.&quot;</span><span class="p">)</span>
<span class="c1"># Write to HDF5</span>
<span class="k">with</span> <span class="n">h5py</span><span class="o">.</span><span class="n">File</span><span class="p">(</span><span class="n">output_filename</span><span class="p">,</span> <span class="n">mode</span><span class="p">)</span> <span class="k">as</span> <span class="n">h5file</span><span class="p">:</span>
<span class="c1"># Add project level attributes at the root/top level</span>
<span class="n">h5file</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">project_level_attributes</span><span class="p">)</span>
<span class="n">__transfer_file_dict_to_hdf5</span><span class="p">(</span><span class="n">h5file</span><span class="p">,</span> <span class="s1">&#39;/&#39;</span><span class="p">,</span> <span class="n">file_dict</span><span class="p">)</span></div>
<span class="c1">#if __name__ == &#39;__main__&#39;:</span>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, JFFO.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@@ -0,0 +1,545 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>src.metadata_review_lib &mdash; DIMA 1.0.0 documentation</title>
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../_static/documentation_options.js?v=8d563738"></script>
<script src="../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script src="../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../index.html" class="icon icon-home">
DIMA
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html">HDF5 data operations</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.hdf5_lib">Data integration with HDF5</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.metadata_review_lib">Data annotation and review</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.hdf5_vis">Data visualization</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.g5505_file_reader">Utilities</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../index.html">DIMA</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
<li class="breadcrumb-item active">src.metadata_review_lib</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for src.metadata_review_lib</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">sys</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="n">root_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">curdir</span><span class="p">)</span>
<span class="n">sys</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">root_dir</span><span class="p">)</span>
<span class="kn">import</span> <span class="nn">subprocess</span>
<span class="kn">import</span> <span class="nn">h5py</span>
<span class="kn">import</span> <span class="nn">yaml</span>
<span class="kn">import</span> <span class="nn">src.g5505_utils</span> <span class="k">as</span> <span class="nn">utils</span>
<span class="kn">import</span> <span class="nn">src.hdf5_vis</span> <span class="k">as</span> <span class="nn">hdf5_vis</span>
<span class="kn">import</span> <span class="nn">src.hdf5_lib</span> <span class="k">as</span> <span class="nn">hdf5_lib</span>
<span class="kn">import</span> <span class="nn">src.git_ops</span> <span class="k">as</span> <span class="nn">git_ops</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="n">YAML_EXT</span> <span class="o">=</span> <span class="s2">&quot;.yaml&quot;</span>
<span class="n">TXT_EXT</span> <span class="o">=</span> <span class="s2">&quot;.txt&quot;</span>
<div class="viewcode-block" id="get_review_status">
<a class="viewcode-back" href="../../modules/src.html#src.metadata_review_lib.get_review_status">[docs]</a>
<span class="k">def</span> <span class="nf">get_review_status</span><span class="p">(</span><span class="n">filename_path</span><span class="p">):</span>
<span class="n">filename_path_tail</span><span class="p">,</span> <span class="n">filename_path_head</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">filename_path</span><span class="p">)</span>
<span class="n">filename</span><span class="p">,</span> <span class="n">ext</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">splitext</span><span class="p">(</span><span class="n">filename_path_head</span><span class="p">)</span>
<span class="c1"># TODO: </span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="s2">&quot;review/&quot;</span><span class="p">,</span><span class="n">filename</span><span class="o">+</span><span class="s2">&quot;-review_status&quot;</span><span class="o">+</span><span class="n">TXT_EXT</span><span class="p">),</span><span class="s1">&#39;r&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
<span class="n">workflow_steps</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="n">f</span><span class="p">:</span>
<span class="n">workflow_steps</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">line</span><span class="p">)</span>
<span class="k">return</span> <span class="n">workflow_steps</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span></div>
<div class="viewcode-block" id="first_initialize_metadata_review">
<a class="viewcode-back" href="../../modules/src.html#src.metadata_review_lib.first_initialize_metadata_review">[docs]</a>
<span class="k">def</span> <span class="nf">first_initialize_metadata_review</span><span class="p">(</span><span class="n">hdf5_file_path</span><span class="p">,</span> <span class="n">reviewer_attrs</span><span class="p">,</span> <span class="n">restart</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> First: Initialize review branch with review folder with a copy of yaml representation of</span>
<span class="sd"> hdf5 file under review and by creating a txt file with the state of the review process, e.g., under review.</span>
<span class="sd"> </span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">initials</span> <span class="o">=</span> <span class="n">reviewer_attrs</span><span class="p">[</span><span class="s1">&#39;initials&#39;</span><span class="p">]</span>
<span class="c1">#branch_name = &#39;-&#39;.join([reviewer_attrs[&#39;type&#39;],&#39;review_&#39;,initials])</span>
<span class="n">branch_name</span> <span class="o">=</span> <span class="s1">&#39;_&#39;</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="s1">&#39;review&#39;</span><span class="p">,</span><span class="n">initials</span><span class="p">])</span>
<span class="n">hdf5_file_path_tail</span><span class="p">,</span> <span class="n">filename_path_head</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">hdf5_file_path</span><span class="p">)</span>
<span class="n">filename</span><span class="p">,</span> <span class="n">ext</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">splitext</span><span class="p">(</span><span class="n">filename_path_head</span><span class="p">)</span>
<span class="c1"># Check file_path points to h5 file</span>
<span class="k">if</span> <span class="ow">not</span> <span class="s1">&#39;h5&#39;</span> <span class="ow">in</span> <span class="n">ext</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;filename_path needs to point to an h5 file.&quot;</span><span class="p">)</span>
<span class="c1"># Verify if yaml snapshot of input h5 file exists </span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">hdf5_file_path_tail</span><span class="p">,</span><span class="n">filename</span><span class="o">+</span><span class="n">YAML_EXT</span><span class="p">)):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;metadata review cannot be initialized. The associated .yaml file under review was not found. Run take_yml_snapshot_of_hdf5_file(filename_path) &quot;</span><span class="p">)</span>
<span class="c1"># Initialize metadata review workflow</span>
<span class="c1"># print(&quot;Create branch metadata-review-by-&quot;+initials+&quot;\n&quot;)</span>
<span class="c1">#checkout_review_branch(branch_name)</span>
<span class="c1"># Check you are working at the right branch </span>
<span class="n">curr_branch</span> <span class="o">=</span> <span class="n">git_ops</span><span class="o">.</span><span class="n">show_current_branch</span><span class="p">()</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">branch_name</span> <span class="ow">in</span> <span class="n">curr_branch</span><span class="o">.</span><span class="n">stdout</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Branch &quot;</span><span class="o">+</span><span class="n">branch_name</span><span class="o">+</span><span class="s2">&quot; was not found. </span><span class="se">\n</span><span class="s2">Please open a Git Bash Terminal, and follow the below instructions: </span><span class="se">\n</span><span class="s2">1. Change directory to your project&#39;s directory. </span><span class="se">\n</span><span class="s2">2. Excecute the command: git checkout &quot;</span><span class="o">+</span><span class="n">branch_name</span><span class="p">)</span>
<span class="c1"># Check if review file already exists and then check if it is still untracked</span>
<span class="n">review_yaml_file_path</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="s2">&quot;review/&quot;</span><span class="p">,</span><span class="n">filename</span><span class="o">+</span><span class="n">YAML_EXT</span><span class="p">)</span>
<span class="n">review_yaml_file_path_tail</span><span class="p">,</span> <span class="n">ext</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">splitext</span><span class="p">(</span><span class="n">review_yaml_file_path</span><span class="p">)</span>
<span class="n">review_status_yaml_file_path</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">review_yaml_file_path_tail</span><span class="o">+</span><span class="s2">&quot;-review_status&quot;</span><span class="o">+</span><span class="s2">&quot;.txt&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">review_yaml_file_path</span><span class="p">)</span> <span class="ow">or</span> <span class="n">restart</span><span class="p">:</span>
<span class="n">review_yaml_file_path</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">make_file_copy</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">hdf5_file_path_tail</span><span class="p">,</span><span class="n">filename</span><span class="o">+</span><span class="n">YAML_EXT</span><span class="p">),</span> <span class="s1">&#39;review&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">restart</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;metadata review has been reinitialized. The review files will reflect the current state of the hdf5 files metadata&#39;</span><span class="p">)</span>
<span class="c1">#if not os.path.exists(os.path.join(review_yaml_file_path_tail+&quot;-review_status&quot;+&quot;.txt&quot;)):</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">review_status_yaml_file_path</span><span class="p">,</span><span class="s1">&#39;w&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
<span class="n">f</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s1">&#39;under review&#39;</span><span class="p">)</span>
<span class="c1"># Stage untracked review files and commit them to local repository</span>
<span class="n">status</span> <span class="o">=</span> <span class="n">git_ops</span><span class="o">.</span><span class="n">get_status</span><span class="p">()</span>
<span class="n">untracked_files</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="n">status</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">splitlines</span><span class="p">():</span>
<span class="c1">#tmp = line.decode(&quot;utf-8&quot;)</span>
<span class="c1">#modified_files.append(tmp.split()[1]) </span>
<span class="k">if</span> <span class="s1">&#39;review/&#39;</span> <span class="ow">in</span> <span class="n">line</span><span class="p">:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="s1">&#39;modified&#39;</span> <span class="ow">in</span> <span class="n">line</span><span class="p">:</span> <span class="c1"># untracked filesand </span>
<span class="n">untracked_files</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">line</span><span class="o">.</span><span class="n">strip</span><span class="p">())</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">untracked_files</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">line</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">split</span><span class="p">()[</span><span class="mi">1</span><span class="p">])</span>
<span class="k">if</span> <span class="s1">&#39;output_files/&#39;</span><span class="o">+</span><span class="n">filename</span><span class="o">+</span><span class="n">YAML_EXT</span> <span class="ow">in</span> <span class="n">line</span> <span class="ow">and</span> <span class="ow">not</span> <span class="s1">&#39;modified&#39;</span> <span class="ow">in</span> <span class="n">line</span><span class="p">:</span>
<span class="n">untracked_files</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">line</span><span class="o">.</span><span class="n">strip</span><span class="p">())</span>
<span class="k">if</span> <span class="n">untracked_files</span><span class="p">:</span>
<span class="n">result</span> <span class="o">=</span> <span class="n">subprocess</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">git_ops</span><span class="o">.</span><span class="n">add_files_to_git</span><span class="p">(</span><span class="n">untracked_files</span><span class="p">),</span><span class="n">capture_output</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span><span class="n">check</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">message</span> <span class="o">=</span> <span class="s1">&#39;Initialized metadata review.&#39;</span>
<span class="n">commit_output</span> <span class="o">=</span> <span class="n">subprocess</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">git_ops</span><span class="o">.</span><span class="n">commit_changes</span><span class="p">(</span><span class="n">message</span><span class="p">),</span><span class="n">capture_output</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span><span class="n">check</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="n">commit_output</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">splitlines</span><span class="p">():</span>
<span class="nb">print</span><span class="p">(</span><span class="n">line</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s1">&#39;utf-8&#39;</span><span class="p">))</span>
<span class="c1">#else:</span>
<span class="c1"># print(&#39;This action will not have any effect because metadata review process has been already initialized.&#39;)</span>
<span class="c1">#status_dict = repo_obj.status()</span>
<span class="c1">#for filepath, file_status in status_dict.items():</span>
<span class="c1"># Identify keys associated to review files and stage them</span>
<span class="c1"># if &#39;review/&#39;+filename in filepath:</span>
<span class="c1"># Stage changes</span>
<span class="c1"># repo_obj.index.add(filepath)</span>
<span class="c1">#author = config_file.author #default_signature</span>
<span class="c1">#committer = config_file.committer</span>
<span class="c1">#message = &quot;Initialized metadata review process.&quot;</span>
<span class="c1">#tree = repo_obj.index.write_tree()</span>
<span class="c1">#oid = repo_obj.create_commit(&#39;HEAD&#39;, author, committer, message, tree, [repo_obj.head.peel().oid])</span>
<span class="c1">#print(&quot;Add and commit&quot;+&quot;\n&quot;) </span>
<span class="k">return</span> <span class="n">review_yaml_file_path</span><span class="p">,</span> <span class="n">review_status_yaml_file_path</span></div>
<div class="viewcode-block" id="second_save_metadata_review">
<a class="viewcode-back" href="../../modules/src.html#src.metadata_review_lib.second_save_metadata_review">[docs]</a>
<span class="k">def</span> <span class="nf">second_save_metadata_review</span><span class="p">(</span><span class="n">review_yaml_file_path</span><span class="p">,</span> <span class="n">reviewer_attrs</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Second: Once you&#39;re done reviewing the yaml representation of hdf5 file in review folder. </span>
<span class="sd"> Change the review status to complete and save (add and commit) modified .yalm and .txt files in the project by</span>
<span class="sd"> running this function. </span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># 1 verify review initializatin was performed first</span>
<span class="c1"># 2. change review status in txt to complete</span>
<span class="c1"># 3. git add review/ and git commit -m &quot;Submitted metadata review&quot;</span>
<span class="n">initials</span> <span class="o">=</span> <span class="n">reviewer_attrs</span><span class="p">[</span><span class="s1">&#39;initials&#39;</span><span class="p">]</span>
<span class="c1">#branch_name = &#39;-&#39;.join([reviewer_attrs[&#39;type&#39;],&#39;review&#39;,&#39;by&#39;,initials])</span>
<span class="n">branch_name</span> <span class="o">=</span> <span class="s1">&#39;_&#39;</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="s1">&#39;review&#39;</span><span class="p">,</span><span class="n">initials</span><span class="p">])</span>
<span class="c1"># TODO: replace with subprocess + git</span>
<span class="c1">#checkout_review_branch(repo_obj, branch_name)</span>
<span class="c1"># Check you are working at the right branch </span>
<span class="n">curr_branch</span> <span class="o">=</span> <span class="n">git_ops</span><span class="o">.</span><span class="n">show_current_branch</span><span class="p">()</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">branch_name</span> <span class="ow">in</span> <span class="n">curr_branch</span><span class="o">.</span><span class="n">stdout</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">&#39;Please checkout &#39;</span> <span class="o">+</span> <span class="n">branch_name</span> <span class="o">+</span> <span class="s1">&#39; via Git Bash before submitting metadata review files. &#39;</span><span class="p">)</span>
<span class="c1"># Collect modified review files</span>
<span class="n">status</span> <span class="o">=</span> <span class="n">git_ops</span><span class="o">.</span><span class="n">get_status</span><span class="p">()</span>
<span class="n">modified_files</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">basename</span><span class="p">(</span><span class="n">review_yaml_file_path</span><span class="p">)</span>
<span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="n">status</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">splitlines</span><span class="p">():</span>
<span class="c1"># conver line from bytes to str</span>
<span class="n">tmp</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s2">&quot;utf-8&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="s1">&#39;modified&#39;</span> <span class="ow">in</span> <span class="n">tmp</span> <span class="ow">and</span> <span class="s1">&#39;review/&#39;</span> <span class="ow">in</span> <span class="n">tmp</span> <span class="ow">and</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">basename</span><span class="p">(</span><span class="n">review_yaml_file_path</span><span class="p">)</span> <span class="ow">in</span> <span class="n">tmp</span><span class="p">:</span>
<span class="n">modified_files</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">tmp</span><span class="o">.</span><span class="n">split</span><span class="p">()[</span><span class="mi">1</span><span class="p">])</span>
<span class="c1"># Stage modified files and commit them to local repository </span>
<span class="n">review_yaml_file_path_tail</span><span class="p">,</span> <span class="n">review_yaml_file_path_head</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">review_yaml_file_path</span><span class="p">)</span>
<span class="n">filename</span><span class="p">,</span> <span class="n">ext</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">splitext</span><span class="p">(</span><span class="n">review_yaml_file_path_head</span><span class="p">)</span>
<span class="k">if</span> <span class="n">modified_files</span><span class="p">:</span>
<span class="n">review_status_file_path</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="s2">&quot;review/&quot;</span><span class="p">,</span><span class="n">filename</span><span class="o">+</span><span class="s2">&quot;-review_status&quot;</span><span class="o">+</span><span class="n">TXT_EXT</span><span class="p">)</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">review_status_file_path</span><span class="p">,</span><span class="s1">&#39;a&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
<span class="n">f</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s1">&#39;</span><span class="se">\n</span><span class="s1">submitted&#39;</span><span class="p">)</span>
<span class="n">modified_files</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">review_status_file_path</span><span class="p">)</span>
<span class="n">result</span> <span class="o">=</span> <span class="n">subprocess</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">git_ops</span><span class="o">.</span><span class="n">add_files_to_git</span><span class="p">(</span><span class="n">modified_files</span><span class="p">),</span><span class="n">capture_output</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span><span class="n">check</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">message</span> <span class="o">=</span> <span class="s1">&#39;Submitted metadata review.&#39;</span>
<span class="n">commit_output</span> <span class="o">=</span> <span class="n">subprocess</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">git_ops</span><span class="o">.</span><span class="n">commit_changes</span><span class="p">(</span><span class="n">message</span><span class="p">),</span><span class="n">capture_output</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span><span class="n">check</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="n">commit_output</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">splitlines</span><span class="p">():</span>
<span class="nb">print</span><span class="p">(</span><span class="n">line</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s1">&#39;utf-8&#39;</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;Nothing to commit.&#39;</span><span class="p">)</span></div>
<span class="c1">#</span>
<div class="viewcode-block" id="load_yaml">
<a class="viewcode-back" href="../../modules/src.html#src.metadata_review_lib.load_yaml">[docs]</a>
<span class="k">def</span> <span class="nf">load_yaml</span><span class="p">(</span><span class="n">yaml_review_file</span><span class="p">):</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">yaml_review_file</span><span class="p">,</span> <span class="s1">&#39;r&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">stream</span><span class="p">:</span>
<span class="k">try</span><span class="p">:</span>
<span class="k">return</span> <span class="n">yaml</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">stream</span><span class="p">,</span> <span class="n">Loader</span><span class="o">=</span><span class="n">yaml</span><span class="o">.</span><span class="n">FullLoader</span><span class="p">)</span>
<span class="k">except</span> <span class="n">yaml</span><span class="o">.</span><span class="n">YAMLError</span> <span class="k">as</span> <span class="n">exc</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="n">exc</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">None</span></div>
<div class="viewcode-block" id="update_hdf5_attributes">
<a class="viewcode-back" href="../../modules/src.html#src.metadata_review_lib.update_hdf5_attributes">[docs]</a>
<span class="k">def</span> <span class="nf">update_hdf5_attributes</span><span class="p">(</span><span class="n">input_hdf5_file</span><span class="p">,</span> <span class="n">yaml_dict</span><span class="p">):</span>
<span class="k">def</span> <span class="nf">update_attributes</span><span class="p">(</span><span class="n">hdf5_obj</span><span class="p">,</span> <span class="n">yaml_obj</span><span class="p">):</span>
<span class="k">for</span> <span class="n">attr_name</span><span class="p">,</span> <span class="n">attr_value</span> <span class="ow">in</span> <span class="n">yaml_obj</span><span class="p">[</span><span class="s1">&#39;attributes&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">attr_value</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
<span class="n">attr_value</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;rename_as&#39;</span><span class="p">:</span> <span class="n">attr_name</span><span class="p">,</span> <span class="s1">&#39;value&#39;</span><span class="p">:</span> <span class="n">attr_value</span><span class="p">}</span>
<span class="k">if</span> <span class="p">(</span><span class="n">attr_name</span> <span class="ow">in</span> <span class="n">hdf5_obj</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">keys</span><span class="p">()):</span> <span class="c1"># delete or update</span>
<span class="k">if</span> <span class="n">attr_value</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;delete&#39;</span><span class="p">):</span> <span class="c1"># delete when True</span>
<span class="n">hdf5_obj</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="fm">__delitem__</span><span class="p">(</span><span class="n">attr_name</span><span class="p">)</span>
<span class="k">elif</span> <span class="ow">not</span> <span class="p">(</span><span class="n">attr_value</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;rename_as&#39;</span><span class="p">)</span> <span class="o">==</span> <span class="n">attr_name</span><span class="p">):</span> <span class="c1"># update when true</span>
<span class="n">hdf5_obj</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="n">attr_value</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;rename_as&#39;</span><span class="p">)]</span> <span class="o">=</span> <span class="n">hdf5_obj</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="n">attr_name</span><span class="p">]</span> <span class="c1"># parse_attribute(attr_value) </span>
<span class="n">hdf5_obj</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="fm">__delitem__</span><span class="p">(</span><span class="n">attr_name</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span> <span class="c1"># add a new attribute</span>
<span class="n">hdf5_obj</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">update</span><span class="p">({</span><span class="n">attr_name</span> <span class="p">:</span> <span class="n">utils</span><span class="o">.</span><span class="n">parse_attribute</span><span class="p">(</span><span class="n">attr_value</span><span class="p">)})</span>
<span class="k">with</span> <span class="n">h5py</span><span class="o">.</span><span class="n">File</span><span class="p">(</span><span class="n">input_hdf5_file</span><span class="p">,</span> <span class="s1">&#39;r+&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
<span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">yaml_dict</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
<span class="n">hdf5_obj</span> <span class="o">=</span> <span class="n">f</span><span class="p">[</span><span class="n">key</span><span class="p">]</span>
<span class="n">yaml_obj</span> <span class="o">=</span> <span class="n">yaml_dict</span><span class="p">[</span><span class="n">key</span><span class="p">]</span>
<span class="n">update_attributes</span><span class="p">(</span><span class="n">hdf5_obj</span><span class="p">,</span> <span class="n">yaml_obj</span><span class="p">)</span></div>
<div class="viewcode-block" id="update_hdf5_file_with_review">
<a class="viewcode-back" href="../../modules/src.html#src.metadata_review_lib.update_hdf5_file_with_review">[docs]</a>
<span class="k">def</span> <span class="nf">update_hdf5_file_with_review</span><span class="p">(</span><span class="n">input_hdf5_file</span><span class="p">,</span> <span class="n">yaml_review_file</span><span class="p">):</span>
<span class="n">yaml_dict</span> <span class="o">=</span> <span class="n">load_yaml</span><span class="p">(</span><span class="n">yaml_review_file</span><span class="p">)</span>
<span class="n">update_hdf5_attributes</span><span class="p">(</span><span class="n">input_hdf5_file</span><span class="p">,</span> <span class="n">yaml_dict</span><span class="p">)</span>
<span class="c1"># Regenerate yaml snapshot of updated HDF5 file</span>
<span class="n">output_yml_filename_path</span> <span class="o">=</span> <span class="n">hdf5_vis</span><span class="o">.</span><span class="n">take_yml_snapshot_of_hdf5_file</span><span class="p">(</span><span class="n">input_hdf5_file</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">output_yml_filename_path</span><span class="si">}</span><span class="s1"> was successfully regenerated from the updated version of</span><span class="si">{</span><span class="n">input_hdf5_file</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span></div>
<div class="viewcode-block" id="third_update_hdf5_file_with_review">
<a class="viewcode-back" href="../../modules/src.html#src.metadata_review_lib.third_update_hdf5_file_with_review">[docs]</a>
<span class="k">def</span> <span class="nf">third_update_hdf5_file_with_review</span><span class="p">(</span><span class="n">input_hdf5_file</span><span class="p">,</span> <span class="n">yaml_review_file</span><span class="p">,</span> <span class="n">reviewer_attrs</span><span class="o">=</span><span class="p">{},</span> <span class="n">hdf5_upload</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="k">if</span> <span class="s1">&#39;submitted&#39;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">get_review_status</span><span class="p">(</span><span class="n">input_hdf5_file</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">&#39;Review yaml file must be submitted before trying to perform an update. Run first second_submit_metadata_review().&#39;</span><span class="p">)</span>
<span class="n">update_hdf5_file_with_review</span><span class="p">(</span><span class="n">input_hdf5_file</span><span class="p">,</span> <span class="n">yaml_review_file</span><span class="p">)</span>
<span class="n">git_ops</span><span class="o">.</span><span class="n">perform_git_operations</span><span class="p">(</span><span class="n">hdf5_upload</span><span class="p">)</span></div>
<div class="viewcode-block" id="count">
<a class="viewcode-back" href="../../modules/src.html#src.metadata_review_lib.count">[docs]</a>
<span class="k">def</span> <span class="nf">count</span><span class="p">(</span><span class="n">hdf5_obj</span><span class="p">,</span><span class="n">yml_dict</span><span class="p">):</span>
<span class="nb">print</span><span class="p">(</span><span class="n">hdf5_obj</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">hdf5_obj</span><span class="p">,</span><span class="n">h5py</span><span class="o">.</span><span class="n">Group</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">hdf5_obj</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;/&#39;</span><span class="p">))</span> <span class="o">&lt;=</span> <span class="mi">4</span><span class="p">:</span>
<span class="n">obj_review</span> <span class="o">=</span> <span class="n">yml_dict</span><span class="p">[</span><span class="n">hdf5_obj</span><span class="o">.</span><span class="n">name</span><span class="p">]</span>
<span class="n">additions</span> <span class="o">=</span> <span class="p">[</span><span class="ow">not</span> <span class="p">(</span><span class="n">item</span> <span class="ow">in</span> <span class="n">hdf5_obj</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">obj_review</span><span class="p">[</span><span class="s1">&#39;attributes&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">keys</span><span class="p">()]</span>
<span class="n">count_additions</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">(</span><span class="n">additions</span><span class="p">)</span>
<span class="n">deletions</span> <span class="o">=</span> <span class="p">[</span><span class="ow">not</span> <span class="p">(</span><span class="n">item</span> <span class="ow">in</span> <span class="n">obj_review</span><span class="p">[</span><span class="s1">&#39;attributes&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">hdf5_obj</span><span class="o">.</span><span class="n">attrs</span><span class="o">.</span><span class="n">keys</span><span class="p">()]</span>
<span class="n">count_delections</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">(</span><span class="n">deletions</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;additions&#39;</span><span class="p">,</span><span class="n">count_additions</span><span class="p">,</span> <span class="s1">&#39;deletions&#39;</span><span class="p">,</span> <span class="n">count_delections</span><span class="p">)</span></div>
<div class="viewcode-block" id="last_submit_metadata_review">
<a class="viewcode-back" href="../../modules/src.html#src.metadata_review_lib.last_submit_metadata_review">[docs]</a>
<span class="k">def</span> <span class="nf">last_submit_metadata_review</span><span class="p">(</span><span class="n">reviewer_attrs</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Fourth: &quot;&quot;&quot;</span>
<span class="n">initials</span> <span class="o">=</span><span class="n">reviewer_attrs</span><span class="p">[</span><span class="s1">&#39;initials&#39;</span><span class="p">]</span>
<span class="n">repository</span> <span class="o">=</span> <span class="s1">&#39;origin&#39;</span>
<span class="n">branch_name</span> <span class="o">=</span> <span class="s1">&#39;_&#39;</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="s1">&#39;review&#39;</span><span class="p">,</span><span class="n">initials</span><span class="p">])</span>
<span class="n">push_command</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">repository</span><span class="p">,</span><span class="n">refspec</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;git&#39;</span><span class="p">,</span><span class="s1">&#39;push&#39;</span><span class="p">,</span><span class="n">repository</span><span class="p">,</span><span class="n">refspec</span><span class="p">]</span>
<span class="n">list_branches_command</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;git&#39;</span><span class="p">,</span><span class="s1">&#39;branch&#39;</span><span class="p">,</span><span class="s1">&#39;--list&#39;</span><span class="p">]</span>
<span class="n">branches</span> <span class="o">=</span> <span class="n">subprocess</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">list_branches_command</span><span class="p">,</span><span class="n">capture_output</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span><span class="n">text</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span><span class="n">check</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">branch_name</span> <span class="ow">in</span> <span class="n">branches</span><span class="o">.</span><span class="n">stdout</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;There is no branch named &#39;</span><span class="o">+</span><span class="n">branch_name</span><span class="o">+</span><span class="s1">&#39;.</span><span class="se">\n</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;Make sure to run data owner review workflow from the beginning without missing any steps.&#39;</span><span class="p">)</span>
<span class="k">return</span>
<span class="n">curr_branch</span> <span class="o">=</span> <span class="n">git_ops</span><span class="o">.</span><span class="n">show_current_branch</span><span class="p">()</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">branch_name</span> <span class="ow">in</span> <span class="n">curr_branch</span><span class="o">.</span><span class="n">stdout</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;Complete metadata review could not be completed.</span><span class="se">\n</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;Make sure a data-owner workflow has already been started on branch &#39;</span><span class="o">+</span><span class="n">branch_name</span><span class="o">+</span><span class="s1">&#39;</span><span class="se">\n</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;The step &quot;Complete metadata review&quot; will have no effect.&#39;</span><span class="p">)</span>
<span class="k">return</span>
<span class="c1"># push</span>
<span class="n">result</span> <span class="o">=</span> <span class="n">subprocess</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">push_command</span><span class="p">(</span><span class="n">repository</span><span class="p">,</span><span class="n">branch_name</span><span class="p">),</span><span class="n">capture_output</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span><span class="n">text</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span><span class="n">check</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">stdout</span><span class="p">)</span>
<span class="c1"># 1. git add output_files/</span>
<span class="c1"># 2. delete review/</span>
<span class="c1">#shutil.rmtree(os.path.join(os.path.abspath(os.curdir),&quot;review&quot;))</span>
<span class="c1"># 3. git rm review/</span>
<span class="c1"># 4. git commit -m &quot;Completed review process. Current state of hdf5 file and yml should be up to date.&quot;</span>
<span class="k">return</span> <span class="n">result</span><span class="o">.</span><span class="n">returncode</span></div>
<span class="c1">#import config_file</span>
<span class="c1">#import hdf5_vis</span>
<div class="viewcode-block" id="MetadataHarvester">
<a class="viewcode-back" href="../../modules/src.html#src.metadata_review_lib.MetadataHarvester">[docs]</a>
<span class="k">class</span> <span class="nc">MetadataHarvester</span><span class="p">:</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">parent_files</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="k">if</span> <span class="n">parent_files</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">parent_files</span> <span class="o">=</span> <span class="p">[]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">parent_files</span> <span class="o">=</span> <span class="n">parent_files</span>
<span class="bp">self</span><span class="o">.</span><span class="n">metadata</span> <span class="o">=</span> <span class="p">{</span>
<span class="s2">&quot;project&quot;</span><span class="p">:</span> <span class="p">{},</span>
<span class="s2">&quot;sample&quot;</span><span class="p">:</span> <span class="p">{},</span>
<span class="s2">&quot;environment&quot;</span><span class="p">:</span> <span class="p">{},</span>
<span class="s2">&quot;instruments&quot;</span><span class="p">:</span> <span class="p">{},</span>
<span class="s2">&quot;datasets&quot;</span><span class="p">:</span> <span class="p">{}</span>
<span class="p">}</span>
<div class="viewcode-block" id="MetadataHarvester.add_project_info">
<a class="viewcode-back" href="../../modules/src.html#src.metadata_review_lib.MetadataHarvester.add_project_info">[docs]</a>
<span class="k">def</span> <span class="nf">add_project_info</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key_or_dict</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">append</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_add_info</span><span class="p">(</span><span class="s2">&quot;project&quot;</span><span class="p">,</span> <span class="n">key_or_dict</span><span class="p">,</span> <span class="n">value</span><span class="p">,</span> <span class="n">append</span><span class="p">)</span></div>
<div class="viewcode-block" id="MetadataHarvester.add_sample_info">
<a class="viewcode-back" href="../../modules/src.html#src.metadata_review_lib.MetadataHarvester.add_sample_info">[docs]</a>
<span class="k">def</span> <span class="nf">add_sample_info</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key_or_dict</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">append</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_add_info</span><span class="p">(</span><span class="s2">&quot;sample&quot;</span><span class="p">,</span> <span class="n">key_or_dict</span><span class="p">,</span> <span class="n">value</span><span class="p">,</span> <span class="n">append</span><span class="p">)</span></div>
<div class="viewcode-block" id="MetadataHarvester.add_environment_info">
<a class="viewcode-back" href="../../modules/src.html#src.metadata_review_lib.MetadataHarvester.add_environment_info">[docs]</a>
<span class="k">def</span> <span class="nf">add_environment_info</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key_or_dict</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">append</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_add_info</span><span class="p">(</span><span class="s2">&quot;environment&quot;</span><span class="p">,</span> <span class="n">key_or_dict</span><span class="p">,</span> <span class="n">value</span><span class="p">,</span> <span class="n">append</span><span class="p">)</span></div>
<div class="viewcode-block" id="MetadataHarvester.add_instrument_info">
<a class="viewcode-back" href="../../modules/src.html#src.metadata_review_lib.MetadataHarvester.add_instrument_info">[docs]</a>
<span class="k">def</span> <span class="nf">add_instrument_info</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key_or_dict</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">append</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_add_info</span><span class="p">(</span><span class="s2">&quot;instruments&quot;</span><span class="p">,</span> <span class="n">key_or_dict</span><span class="p">,</span> <span class="n">value</span><span class="p">,</span> <span class="n">append</span><span class="p">)</span></div>
<div class="viewcode-block" id="MetadataHarvester.add_dataset_info">
<a class="viewcode-back" href="../../modules/src.html#src.metadata_review_lib.MetadataHarvester.add_dataset_info">[docs]</a>
<span class="k">def</span> <span class="nf">add_dataset_info</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key_or_dict</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">append</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_add_info</span><span class="p">(</span><span class="s2">&quot;datasets&quot;</span><span class="p">,</span> <span class="n">key_or_dict</span><span class="p">,</span> <span class="n">value</span><span class="p">,</span> <span class="n">append</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">_add_info</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">category</span><span class="p">,</span> <span class="n">key_or_dict</span><span class="p">,</span> <span class="n">value</span><span class="p">,</span> <span class="n">append</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Internal helper method to add information to a category.&quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">key_or_dict</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">metadata</span><span class="p">[</span><span class="n">category</span><span class="p">]</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">key_or_dict</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="n">key_or_dict</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">metadata</span><span class="p">[</span><span class="n">category</span><span class="p">]:</span>
<span class="k">if</span> <span class="n">append</span><span class="p">:</span>
<span class="n">current_value</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">metadata</span><span class="p">[</span><span class="n">category</span><span class="p">][</span><span class="n">key_or_dict</span><span class="p">]</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">current_value</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
<span class="c1"># Append the new value to the list</span>
<span class="bp">self</span><span class="o">.</span><span class="n">metadata</span><span class="p">[</span><span class="n">category</span><span class="p">][</span><span class="n">key_or_dict</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">metadata</span><span class="p">[</span><span class="n">category</span><span class="p">][</span><span class="n">key_or_dict</span><span class="p">]</span> <span class="o">=</span> <span class="n">current_value</span> <span class="o">+</span> <span class="n">value</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">current_value</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="c1"># Append the new value as a comma-separated string</span>
<span class="bp">self</span><span class="o">.</span><span class="n">metadata</span><span class="p">[</span><span class="n">category</span><span class="p">][</span><span class="n">key_or_dict</span><span class="p">]</span> <span class="o">=</span> <span class="n">current_value</span> <span class="o">+</span> <span class="s1">&#39;,&#39;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># Handle other types (for completeness, usually not required)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">metadata</span><span class="p">[</span><span class="n">category</span><span class="p">][</span><span class="n">key_or_dict</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span><span class="n">current_value</span><span class="p">,</span> <span class="n">value</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">metadata</span><span class="p">[</span><span class="n">category</span><span class="p">][</span><span class="n">key_or_dict</span><span class="p">]</span> <span class="o">=</span> <span class="n">value</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">metadata</span><span class="p">[</span><span class="n">category</span><span class="p">][</span><span class="n">key_or_dict</span><span class="p">]</span> <span class="o">=</span> <span class="n">value</span>
<div class="viewcode-block" id="MetadataHarvester.get_metadata">
<a class="viewcode-back" href="../../modules/src.html#src.metadata_review_lib.MetadataHarvester.get_metadata">[docs]</a>
<span class="k">def</span> <span class="nf">get_metadata</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">return</span> <span class="p">{</span>
<span class="s2">&quot;parent_files&quot;</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">parent_files</span><span class="p">,</span>
<span class="s2">&quot;metadata&quot;</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">metadata</span>
<span class="p">}</span></div>
<div class="viewcode-block" id="MetadataHarvester.print_metadata">
<a class="viewcode-back" href="../../modules/src.html#src.metadata_review_lib.MetadataHarvester.print_metadata">[docs]</a>
<span class="k">def</span> <span class="nf">print_metadata</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;parent_files&quot;</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">parent_files</span><span class="p">)</span>
<span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">metadata</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
<span class="nb">print</span><span class="p">(</span><span class="n">key</span><span class="p">,</span><span class="s1">&#39;metadata:</span><span class="se">\n</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">metadata</span><span class="p">[</span><span class="n">key</span><span class="p">]</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="nb">print</span><span class="p">(</span><span class="n">item</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span><span class="n">item</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span></div>
<div class="viewcode-block" id="MetadataHarvester.clear_metadata">
<a class="viewcode-back" href="../../modules/src.html#src.metadata_review_lib.MetadataHarvester.clear_metadata">[docs]</a>
<span class="k">def</span> <span class="nf">clear_metadata</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">metadata</span> <span class="o">=</span> <span class="p">{</span>
<span class="s2">&quot;project&quot;</span><span class="p">:</span> <span class="p">{},</span>
<span class="s2">&quot;sample&quot;</span><span class="p">:</span> <span class="p">{},</span>
<span class="s2">&quot;environment&quot;</span><span class="p">:</span> <span class="p">{},</span>
<span class="s2">&quot;instruments&quot;</span><span class="p">:</span> <span class="p">{},</span>
<span class="s2">&quot;datasets&quot;</span><span class="p">:</span> <span class="p">{}</span>
<span class="p">}</span>
<span class="bp">self</span><span class="o">.</span><span class="n">parent_files</span> <span class="o">=</span> <span class="p">[]</span></div>
</div>
<div class="viewcode-block" id="main">
<a class="viewcode-back" href="../../modules/src.html#src.metadata_review_lib.main">[docs]</a>
<span class="k">def</span> <span class="nf">main</span><span class="p">():</span>
<span class="n">output_filename_path</span> <span class="o">=</span> <span class="s2">&quot;output_files/unified_file_smog_chamber_2024-03-19_UTC-OFST_+0100_NG.h5&quot;</span>
<span class="n">output_yml_filename_path</span> <span class="o">=</span> <span class="s2">&quot;output_files/unified_file_smog_chamber_2024-03-19_UTC-OFST_+0100_NG.yalm&quot;</span>
<span class="n">output_yml_filename_path_tail</span><span class="p">,</span> <span class="n">filename</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">output_yml_filename_path</span><span class="p">)</span></div>
<span class="c1">#output_yml_filename_path = hdf5_vis.take_yml_snapshot_of_hdf5_file(output_filename_path)</span>
<span class="c1">#first_initialize_metadata_review(output_filename_path,initials=&#39;NG&#39;)</span>
<span class="c1">#second_submit_metadata_review()</span>
<span class="c1">#if os.path.exists(os.path.join(os.path.join(os.path.abspath(os.curdir),&quot;review&quot;),filename)):</span>
<span class="c1"># third_update_hdf5_file_with_review(output_filename_path, os.path.join(os.path.join(os.path.abspath(os.curdir),&quot;review&quot;),filename))</span>
<span class="c1">#fourth_complete_metadata_review() </span>
<span class="c1">#if __name__ == &#39;__main__&#39;:</span>
<span class="c1"># main()</span>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, JFFO.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@@ -0,0 +1,565 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>utils.g5505_utils &mdash; DIMA 1.0.0 documentation</title>
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../_static/documentation_options.js?v=8d563738"></script>
<script src="../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script src="../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../index.html" class="icon icon-home">
DIMA
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html">HDF5 data operations</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.hdf5_writer">Data integration with HDF5</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/src.html#module-src.hdf5_vis">Data visualization</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/pipelines.html">Pipelines and workflows</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../modules/utils.html">Utilities</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../index.html">DIMA</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
<li class="breadcrumb-item active">utils.g5505_utils</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for utils.g5505_utils</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="kn">import</span> <span class="nn">sys</span>
<span class="kn">import</span> <span class="nn">shutil</span>
<span class="kn">import</span> <span class="nn">datetime</span>
<span class="kn">import</span> <span class="nn">logging</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">import</span> <span class="nn">h5py</span>
<span class="kn">import</span> <span class="nn">re</span>
<div class="viewcode-block" id="setup_logging">
<a class="viewcode-back" href="../../modules/utils.html#utils.g5505_utils.setup_logging">[docs]</a>
<span class="k">def</span> <span class="nf">setup_logging</span><span class="p">(</span><span class="n">log_dir</span><span class="p">,</span> <span class="n">log_filename</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Sets up logging to a specified directory and file.</span>
<span class="sd"> Parameters:</span>
<span class="sd"> log_dir (str): Directory to save the log file.</span>
<span class="sd"> log_filename (str): Name of the log file.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># Ensure the log directory exists</span>
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">log_dir</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="c1"># Create a logger instance</span>
<span class="n">logger</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">()</span>
<span class="n">logger</span><span class="o">.</span><span class="n">setLevel</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">INFO</span><span class="p">)</span>
<span class="c1"># Create a file handler</span>
<span class="n">log_path</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">log_dir</span><span class="p">,</span> <span class="n">log_filename</span><span class="p">)</span>
<span class="n">file_handler</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">FileHandler</span><span class="p">(</span><span class="n">log_path</span><span class="p">)</span>
<span class="c1"># Create a formatter and set it for the handler</span>
<span class="n">formatter</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">Formatter</span><span class="p">(</span><span class="s1">&#39;</span><span class="si">%(asctime)s</span><span class="s1"> - </span><span class="si">%(name)s</span><span class="s1"> - </span><span class="si">%(levelname)s</span><span class="s1"> - </span><span class="si">%(message)s</span><span class="s1">&#39;</span><span class="p">)</span>
<span class="n">file_handler</span><span class="o">.</span><span class="n">setFormatter</span><span class="p">(</span><span class="n">formatter</span><span class="p">)</span>
<span class="c1"># Add the handler to the logger</span>
<span class="n">logger</span><span class="o">.</span><span class="n">addHandler</span><span class="p">(</span><span class="n">file_handler</span><span class="p">)</span></div>
<div class="viewcode-block" id="is_callable_list">
<a class="viewcode-back" href="../../modules/utils.html#utils.g5505_utils.is_callable_list">[docs]</a>
<span class="k">def</span> <span class="nf">is_callable_list</span><span class="p">(</span><span class="n">x</span> <span class="p">:</span> <span class="nb">list</span><span class="p">):</span>
<span class="k">return</span> <span class="nb">all</span><span class="p">([</span><span class="nb">callable</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">x</span><span class="p">])</span></div>
<div class="viewcode-block" id="is_str_list">
<a class="viewcode-back" href="../../modules/utils.html#utils.g5505_utils.is_str_list">[docs]</a>
<span class="k">def</span> <span class="nf">is_str_list</span><span class="p">(</span><span class="n">x</span> <span class="p">:</span> <span class="nb">list</span><span class="p">):</span>
<span class="k">return</span> <span class="nb">all</span><span class="p">([</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">item</span><span class="p">,</span><span class="nb">str</span><span class="p">)</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">x</span><span class="p">])</span></div>
<div class="viewcode-block" id="augment_with_filetype">
<a class="viewcode-back" href="../../modules/utils.html#utils.g5505_utils.augment_with_filetype">[docs]</a>
<span class="k">def</span> <span class="nf">augment_with_filetype</span><span class="p">(</span><span class="n">df</span><span class="p">):</span>
<span class="n">df</span><span class="p">[</span><span class="s1">&#39;filetype&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">splitext</span><span class="p">(</span><span class="n">item</span><span class="p">)[</span><span class="mi">1</span><span class="p">][</span><span class="mi">1</span><span class="p">::]</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">df</span><span class="p">[</span><span class="s1">&#39;filename&#39;</span><span class="p">]]</span>
<span class="c1">#return [os.path.splitext(item)[1][1::] for item in df[&#39;filename&#39;]]</span>
<span class="k">return</span> <span class="n">df</span></div>
<div class="viewcode-block" id="augment_with_filenumber">
<a class="viewcode-back" href="../../modules/utils.html#utils.g5505_utils.augment_with_filenumber">[docs]</a>
<span class="k">def</span> <span class="nf">augment_with_filenumber</span><span class="p">(</span><span class="n">df</span><span class="p">):</span>
<span class="n">df</span><span class="p">[</span><span class="s1">&#39;filenumber&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span><span class="n">item</span><span class="p">[</span><span class="mi">0</span><span class="p">:</span><span class="n">item</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">&#39;_&#39;</span><span class="p">)]</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">df</span><span class="p">[</span><span class="s1">&#39;filename&#39;</span><span class="p">]]</span>
<span class="c1">#return [item[0:item.find(&#39;_&#39;)] for item in df[&#39;filename&#39;]]</span>
<span class="k">return</span> <span class="n">df</span></div>
<div class="viewcode-block" id="group_by_df_column">
<a class="viewcode-back" href="../../modules/utils.html#utils.g5505_utils.group_by_df_column">[docs]</a>
<span class="k">def</span> <span class="nf">group_by_df_column</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">column_name</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> df (pandas.DataFrame): </span>
<span class="sd"> column_name (str): column_name of df by which grouping operation will take place. </span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">column_name</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">columns</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;column_name must be in the columns of df.&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">df</span><span class="p">[</span><span class="n">column_name</span><span class="p">]</span></div>
<div class="viewcode-block" id="split_sample_col_into_sample_and_data_quality_cols">
<a class="viewcode-back" href="../../modules/utils.html#utils.g5505_utils.split_sample_col_into_sample_and_data_quality_cols">[docs]</a>
<span class="k">def</span> <span class="nf">split_sample_col_into_sample_and_data_quality_cols</span><span class="p">(</span><span class="n">input_data</span><span class="p">:</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">):</span>
<span class="n">sample_name</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">sample_quality</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">input_data</span><span class="p">[</span><span class="s1">&#39;sample&#39;</span><span class="p">]:</span>
<span class="k">if</span> <span class="n">item</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">&#39;(&#39;</span><span class="p">)</span><span class="o">!=-</span><span class="mi">1</span><span class="p">:</span>
<span class="c1">#print(item)</span>
<span class="n">sample_name</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">item</span><span class="p">[</span><span class="mi">0</span><span class="p">:</span><span class="n">item</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">&#39;(&#39;</span><span class="p">)])</span>
<span class="n">sample_quality</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">item</span><span class="p">[</span><span class="n">item</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">&#39;(&#39;</span><span class="p">)</span><span class="o">+</span><span class="mi">1</span><span class="p">:</span><span class="nb">len</span><span class="p">(</span><span class="n">item</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="n">item</span><span class="o">==</span><span class="s1">&#39;&#39;</span><span class="p">:</span>
<span class="n">sample_name</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s1">&#39;Not yet annotated&#39;</span><span class="p">)</span>
<span class="n">sample_quality</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s1">&#39;unevaluated&#39;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">sample_name</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
<span class="n">sample_quality</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s1">&#39;good data&#39;</span><span class="p">)</span>
<span class="n">input_data</span><span class="p">[</span><span class="s1">&#39;sample&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">sample_name</span>
<span class="n">input_data</span><span class="p">[</span><span class="s1">&#39;data_quality&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">sample_quality</span>
<span class="k">return</span> <span class="n">input_data</span></div>
<div class="viewcode-block" id="make_file_copy">
<a class="viewcode-back" href="../../modules/utils.html#utils.g5505_utils.make_file_copy">[docs]</a>
<span class="k">def</span> <span class="nf">make_file_copy</span><span class="p">(</span><span class="n">source_file_path</span><span class="p">,</span> <span class="n">output_folder_name</span> <span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s1">&#39;tmp_files&#39;</span><span class="p">):</span>
<span class="n">pathtail</span><span class="p">,</span> <span class="n">filename</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">source_file_path</span><span class="p">)</span>
<span class="c1">#backup_filename = &#39;backup_&#39;+ filename</span>
<span class="n">backup_filename</span> <span class="o">=</span> <span class="n">filename</span>
<span class="c1"># Path </span>
<span class="n">ROOT_DIR</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">abspath</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">curdir</span><span class="p">)</span>
<span class="n">tmp_dirpath</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">ROOT_DIR</span><span class="p">,</span><span class="n">output_folder_name</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">tmp_dirpath</span><span class="p">):</span>
<span class="n">os</span><span class="o">.</span><span class="n">mkdir</span><span class="p">(</span><span class="n">tmp_dirpath</span><span class="p">)</span>
<span class="n">tmp_file_path</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">tmp_dirpath</span><span class="p">,</span><span class="n">backup_filename</span><span class="p">)</span>
<span class="n">shutil</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">source_file_path</span><span class="p">,</span> <span class="n">tmp_file_path</span><span class="p">)</span>
<span class="k">return</span> <span class="n">tmp_file_path</span></div>
<div class="viewcode-block" id="created_at">
<a class="viewcode-back" href="../../modules/utils.html#utils.g5505_utils.created_at">[docs]</a>
<span class="k">def</span> <span class="nf">created_at</span><span class="p">(</span><span class="n">datetime_format</span> <span class="o">=</span> <span class="s1">&#39;%Y-%m-</span><span class="si">%d</span><span class="s1"> %H:%M:%S&#39;</span><span class="p">):</span>
<span class="n">now</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
<span class="c1"># Populate now object with time zone information obtained from the local system</span>
<span class="n">now_tz_aware</span> <span class="o">=</span> <span class="n">now</span><span class="o">.</span><span class="n">astimezone</span><span class="p">()</span>
<span class="n">tz</span> <span class="o">=</span> <span class="n">now_tz_aware</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="s1">&#39;%z&#39;</span><span class="p">)</span>
<span class="c1"># Replace colons in the time part of the timestamp with hyphens to make it file name friendly</span>
<span class="n">created_at</span> <span class="o">=</span> <span class="n">now_tz_aware</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="n">datetime_format</span><span class="p">)</span> <span class="c1">#+ &#39;_UTC-OFST_&#39; + tz</span>
<span class="k">return</span> <span class="n">created_at</span></div>
<div class="viewcode-block" id="sanitize_dataframe">
<a class="viewcode-back" href="../../modules/utils.html#utils.g5505_utils.sanitize_dataframe">[docs]</a>
<span class="k">def</span> <span class="nf">sanitize_dataframe</span><span class="p">(</span><span class="n">df</span><span class="p">:</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">:</span>
<span class="c1"># Handle datetime columns (convert to string in &#39;yyyy-mm-dd hh:mm:ss&#39; format)</span>
<span class="n">datetime_cols</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">select_dtypes</span><span class="p">(</span><span class="n">include</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;datetime&#39;</span><span class="p">])</span><span class="o">.</span><span class="n">columns</span>
<span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">datetime_cols</span><span class="p">:</span>
<span class="c1"># Convert datetime to string in the specified format, handling NaT</span>
<span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span><span class="o">.</span><span class="n">dt</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="s1">&#39;%Y-%m-</span><span class="si">%d</span><span class="s1"> %H-%M-%S&#39;</span><span class="p">)</span>
<span class="c1"># Handle object columns with mixed types</span>
<span class="n">otype_cols</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">select_dtypes</span><span class="p">(</span><span class="n">include</span><span class="o">=</span><span class="s1">&#39;O&#39;</span><span class="p">)</span>
<span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">otype_cols</span><span class="p">:</span>
<span class="n">col_data</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span>
<span class="c1"># Check if all elements in the column are strings</span>
<span class="k">if</span> <span class="n">col_data</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="nb">str</span><span class="p">))</span><span class="o">.</span><span class="n">all</span><span class="p">():</span>
<span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">str</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># If the column contains mixed types, attempt to convert to numeric, coercing errors to NaN</span>
<span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">to_numeric</span><span class="p">(</span><span class="n">col_data</span><span class="p">,</span> <span class="n">errors</span><span class="o">=</span><span class="s1">&#39;coerce&#39;</span><span class="p">)</span>
<span class="c1"># Handle NaN values differently based on dtype</span>
<span class="k">if</span> <span class="n">pd</span><span class="o">.</span><span class="n">api</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_string_dtype</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]):</span>
<span class="c1"># Replace NaN in string columns with empty string</span>
<span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span><span class="o">.</span><span class="n">fillna</span><span class="p">(</span><span class="s1">&#39;&#39;</span><span class="p">)</span> <span class="c1"># Replace NaN with empty string</span>
<span class="k">elif</span> <span class="n">pd</span><span class="o">.</span><span class="n">api</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_numeric_dtype</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]):</span>
<span class="c1"># For numeric columns, we want to keep NaN as it is</span>
<span class="c1"># But if integer column has NaN, consider casting to float</span>
<span class="k">if</span> <span class="n">pd</span><span class="o">.</span><span class="n">api</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_integer_dtype</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]):</span>
<span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span> <span class="c1"># Cast to float to allow NaN</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span><span class="o">.</span><span class="n">fillna</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">)</span> <span class="c1"># Keep NaN in float columns</span>
<span class="k">return</span> <span class="n">df</span></div>
<div class="viewcode-block" id="convert_dataframe_to_np_structured_array">
<a class="viewcode-back" href="../../modules/utils.html#utils.g5505_utils.convert_dataframe_to_np_structured_array">[docs]</a>
<span class="k">def</span> <span class="nf">convert_dataframe_to_np_structured_array</span><span class="p">(</span><span class="n">df</span><span class="p">:</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">):</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">sanitize_dataframe</span><span class="p">(</span><span class="n">df</span><span class="p">)</span>
<span class="c1"># Define the dtype for the structured array, ensuring compatibility with h5py</span>
<span class="n">dtype</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">columns</span><span class="p">:</span>
<span class="n">col_data</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span>
<span class="n">col_dtype</span> <span class="o">=</span> <span class="n">col_data</span><span class="o">.</span><span class="n">dtype</span>
<span class="k">try</span><span class="p">:</span>
<span class="k">if</span> <span class="n">pd</span><span class="o">.</span><span class="n">api</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_string_dtype</span><span class="p">(</span><span class="n">col_dtype</span><span class="p">):</span>
<span class="c1"># Convert string dtype to fixed-length strings</span>
<span class="n">max_len</span> <span class="o">=</span> <span class="n">col_data</span><span class="o">.</span><span class="n">str</span><span class="o">.</span><span class="n">len</span><span class="p">()</span><span class="o">.</span><span class="n">max</span><span class="p">()</span> <span class="k">if</span> <span class="ow">not</span> <span class="n">col_data</span><span class="o">.</span><span class="n">isnull</span><span class="p">()</span><span class="o">.</span><span class="n">all</span><span class="p">()</span> <span class="k">else</span> <span class="mi">0</span>
<span class="n">dtype</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">col</span><span class="p">,</span> <span class="sa">f</span><span class="s1">&#39;S</span><span class="si">{</span><span class="n">max_len</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">))</span>
<span class="k">elif</span> <span class="n">pd</span><span class="o">.</span><span class="n">api</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_integer_dtype</span><span class="p">(</span><span class="n">col_dtype</span><span class="p">):</span>
<span class="n">dtype</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">col</span><span class="p">,</span> <span class="s1">&#39;i4&#39;</span><span class="p">))</span> <span class="c1"># Assuming 32-bit integer</span>
<span class="k">elif</span> <span class="n">pd</span><span class="o">.</span><span class="n">api</span><span class="o">.</span><span class="n">types</span><span class="o">.</span><span class="n">is_float_dtype</span><span class="p">(</span><span class="n">col_dtype</span><span class="p">):</span>
<span class="n">dtype</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">col</span><span class="p">,</span> <span class="s1">&#39;f4&#39;</span><span class="p">))</span> <span class="c1"># Assuming 32-bit float</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># Handle unsupported data types</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Unsupported dtype found in column &#39;</span><span class="si">{</span><span class="n">col</span><span class="si">}</span><span class="s2">&#39;: </span><span class="si">{</span><span class="n">col_data</span><span class="o">.</span><span class="n">dtype</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Unsupported data type: </span><span class="si">{</span><span class="n">col_data</span><span class="o">.</span><span class="n">dtype</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="c1"># Log more detailed error message</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Error processing column &#39;</span><span class="si">{</span><span class="n">col</span><span class="si">}</span><span class="s2">&#39;: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="k">raise</span>
<span class="c1"># Convert the DataFrame to a structured array</span>
<span class="n">structured_array</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">itertuples</span><span class="p">(</span><span class="n">index</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="kc">None</span><span class="p">)),</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
<span class="k">return</span> <span class="n">structured_array</span></div>
<div class="viewcode-block" id="convert_string_to_bytes">
<a class="viewcode-back" href="../../modules/utils.html#utils.g5505_utils.convert_string_to_bytes">[docs]</a>
<span class="k">def</span> <span class="nf">convert_string_to_bytes</span><span class="p">(</span><span class="n">input_list</span><span class="p">:</span> <span class="nb">list</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Convert a list of strings into a numpy array with utf8-type entries.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> input_list (list) : list of string objects</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> input_array_bytes (ndarray): array of ut8-type entries.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">utf8_type</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">max_length</span><span class="p">:</span> <span class="n">h5py</span><span class="o">.</span><span class="n">string_dtype</span><span class="p">(</span><span class="s1">&#39;utf-8&#39;</span><span class="p">,</span> <span class="n">max_length</span><span class="p">)</span>
<span class="k">if</span> <span class="n">input_list</span><span class="p">:</span>
<span class="n">max_length</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">input_list</span><span class="p">)</span>
<span class="c1"># Convert the strings to bytes with utf-8 encoding, specifying errors=&#39;ignore&#39; to skip characters that cannot be encoded</span>
<span class="n">input_list_bytes</span> <span class="o">=</span> <span class="p">[</span><span class="n">item</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s1">&#39;utf-8&#39;</span><span class="p">,</span> <span class="n">errors</span><span class="o">=</span><span class="s1">&#39;ignore&#39;</span><span class="p">)</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">input_list</span><span class="p">]</span>
<span class="n">input_array_bytes</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">input_list_bytes</span><span class="p">,</span><span class="n">dtype</span><span class="o">=</span><span class="n">utf8_type</span><span class="p">(</span><span class="n">max_length</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">input_array_bytes</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([],</span><span class="n">dtype</span><span class="o">=</span><span class="n">utf8_type</span><span class="p">(</span><span class="mi">0</span><span class="p">))</span>
<span class="k">return</span> <span class="n">input_array_bytes</span></div>
<div class="viewcode-block" id="convert_attrdict_to_np_structured_array">
<a class="viewcode-back" href="../../modules/utils.html#utils.g5505_utils.convert_attrdict_to_np_structured_array">[docs]</a>
<span class="k">def</span> <span class="nf">convert_attrdict_to_np_structured_array</span><span class="p">(</span><span class="n">attr_value</span><span class="p">:</span> <span class="nb">dict</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Converts a dictionary of attributes into a numpy structured array for HDF5 </span>
<span class="sd"> compound type compatibility.</span>
<span class="sd"> Each dictionary key is mapped to a field in the structured array, with the </span>
<span class="sd"> data type (S) determined by the longest string representation of the values. </span>
<span class="sd"> If the dictionary is empty, the function returns &#39;missing&#39;.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> attr_value : dict</span>
<span class="sd"> Dictionary containing the attributes to be converted. Example:</span>
<span class="sd"> attr_value = {</span>
<span class="sd"> &#39;name&#39;: &#39;Temperature&#39;,</span>
<span class="sd"> &#39;unit&#39;: &#39;Celsius&#39;,</span>
<span class="sd"> &#39;value&#39;: 23.5,</span>
<span class="sd"> &#39;timestamp&#39;: &#39;2023-09-26 10:00&#39;</span>
<span class="sd"> }</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> new_attr_value : ndarray or str</span>
<span class="sd"> Numpy structured array with UTF-8 encoded fields. Returns &#39;missing&#39; if </span>
<span class="sd"> the input dictionary is empty.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">dtype</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">values_list</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">max_length</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">attr_value</span><span class="p">[</span><span class="n">key</span><span class="p">]))</span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">attr_value</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
<span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">attr_value</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
<span class="k">if</span> <span class="n">key</span> <span class="o">!=</span> <span class="s1">&#39;rename_as&#39;</span><span class="p">:</span>
<span class="n">dtype</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">key</span><span class="p">,</span> <span class="sa">f</span><span class="s1">&#39;S</span><span class="si">{</span><span class="n">max_length</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">))</span>
<span class="n">values_list</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">attr_value</span><span class="p">[</span><span class="n">key</span><span class="p">])</span>
<span class="k">if</span> <span class="n">values_list</span><span class="p">:</span>
<span class="n">new_attr_value</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="nb">tuple</span><span class="p">(</span><span class="n">values_list</span><span class="p">)],</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">new_attr_value</span> <span class="o">=</span> <span class="s1">&#39;missing&#39;</span>
<span class="k">return</span> <span class="n">new_attr_value</span></div>
<div class="viewcode-block" id="infer_units">
<a class="viewcode-back" href="../../modules/utils.html#utils.g5505_utils.infer_units">[docs]</a>
<span class="k">def</span> <span class="nf">infer_units</span><span class="p">(</span><span class="n">column_name</span><span class="p">):</span>
<span class="c1"># TODO: complete or remove</span>
<span class="n">match</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="s1">&#39;\[.+\]&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">match</span><span class="p">:</span>
<span class="k">return</span> <span class="n">match</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">match</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="s1">&#39;\(.+\)&#39;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">match</span></div>
<div class="viewcode-block" id="progressBar">
<a class="viewcode-back" href="../../modules/utils.html#utils.g5505_utils.progressBar">[docs]</a>
<span class="k">def</span> <span class="nf">progressBar</span><span class="p">(</span><span class="n">count_value</span><span class="p">,</span> <span class="n">total</span><span class="p">,</span> <span class="n">suffix</span><span class="o">=</span><span class="s1">&#39;&#39;</span><span class="p">):</span>
<span class="n">bar_length</span> <span class="o">=</span> <span class="mi">100</span>
<span class="n">filled_up_Length</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="nb">round</span><span class="p">(</span><span class="n">bar_length</span><span class="o">*</span> <span class="n">count_value</span> <span class="o">/</span> <span class="nb">float</span><span class="p">(</span><span class="n">total</span><span class="p">)))</span>
<span class="n">percentage</span> <span class="o">=</span> <span class="nb">round</span><span class="p">(</span><span class="mf">100.0</span> <span class="o">*</span> <span class="n">count_value</span><span class="o">/</span><span class="nb">float</span><span class="p">(</span><span class="n">total</span><span class="p">),</span><span class="mi">1</span><span class="p">)</span>
<span class="n">bar</span> <span class="o">=</span> <span class="s1">&#39;=&#39;</span> <span class="o">*</span> <span class="n">filled_up_Length</span> <span class="o">+</span> <span class="s1">&#39;-&#39;</span> <span class="o">*</span> <span class="p">(</span><span class="n">bar_length</span> <span class="o">-</span> <span class="n">filled_up_Length</span><span class="p">)</span>
<span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s1">&#39;[</span><span class="si">%s</span><span class="s1">] </span><span class="si">%s%s</span><span class="s1"> ...</span><span class="si">%s</span><span class="se">\r</span><span class="s1">&#39;</span> <span class="o">%</span><span class="p">(</span><span class="n">bar</span><span class="p">,</span> <span class="n">percentage</span><span class="p">,</span> <span class="s1">&#39;%&#39;</span><span class="p">,</span> <span class="n">suffix</span><span class="p">))</span>
<span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">flush</span><span class="p">()</span></div>
<div class="viewcode-block" id="copy_directory_with_contraints">
<a class="viewcode-back" href="../../modules/utils.html#utils.g5505_utils.copy_directory_with_contraints">[docs]</a>
<span class="k">def</span> <span class="nf">copy_directory_with_contraints</span><span class="p">(</span><span class="n">input_dir_path</span><span class="p">,</span> <span class="n">output_dir_path</span><span class="p">,</span>
<span class="n">select_dir_keywords</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">select_file_keywords</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">allowed_file_extensions</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">dry_run</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Copies files from input_dir_path to output_dir_path based on specified constraints.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> input_dir_path (str): Path to the input directory.</span>
<span class="sd"> output_dir_path (str): Path to the output directory.</span>
<span class="sd"> select_dir_keywords (list): optional, List of keywords for selecting directories.</span>
<span class="sd"> select_file_keywords (list): optional, List of keywords for selecting files.</span>
<span class="sd"> allowed_file_extensions (list): optional, List of allowed file extensions.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> path_to_files_dict (dict): dictionary mapping directory paths to lists of copied file names satisfying the constraints.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># Unconstrained default behavior: No filters, make sure variable are lists even when defined as None in function signature</span>
<span class="n">select_dir_keywords</span> <span class="o">=</span> <span class="n">select_dir_keywords</span> <span class="ow">or</span> <span class="p">[]</span>
<span class="n">select_file_keywords</span> <span class="o">=</span> <span class="n">select_file_keywords</span> <span class="ow">or</span> <span class="p">[]</span>
<span class="n">allowed_file_extensions</span> <span class="o">=</span> <span class="n">allowed_file_extensions</span> <span class="ow">or</span> <span class="p">[]</span>
<span class="n">date</span> <span class="o">=</span> <span class="n">created_at</span><span class="p">(</span><span class="s1">&#39;%Y_%m&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;:&quot;</span><span class="p">,</span> <span class="s2">&quot;-&quot;</span><span class="p">)</span>
<span class="n">log_dir</span><span class="o">=</span><span class="s1">&#39;logs/&#39;</span>
<span class="n">setup_logging</span><span class="p">(</span><span class="n">log_dir</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;copy_directory_with_contraints_</span><span class="si">{</span><span class="n">date</span><span class="si">}</span><span class="s2">.log&quot;</span><span class="p">)</span>
<span class="c1"># Define helper functions. Return by default true when filtering lists are either None or []</span>
<span class="k">def</span> <span class="nf">has_allowed_extension</span><span class="p">(</span><span class="n">filename</span><span class="p">):</span>
<span class="k">return</span> <span class="ow">not</span> <span class="n">allowed_file_extensions</span> <span class="ow">or</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">splitext</span><span class="p">(</span><span class="n">filename</span><span class="p">)[</span><span class="mi">1</span><span class="p">]</span> <span class="ow">in</span> <span class="n">allowed_file_extensions</span>
<span class="k">def</span> <span class="nf">file_is_selected</span><span class="p">(</span><span class="n">filename</span><span class="p">):</span>
<span class="k">return</span> <span class="ow">not</span> <span class="n">select_file_keywords</span> <span class="ow">or</span> <span class="nb">any</span><span class="p">(</span><span class="n">keyword</span> <span class="ow">in</span> <span class="n">filename</span> <span class="k">for</span> <span class="n">keyword</span> <span class="ow">in</span> <span class="n">select_file_keywords</span><span class="p">)</span>
<span class="c1"># Collect paths of directories, which are directly connected to the root dir and match select_dir_keywords</span>
<span class="n">paths</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">if</span> <span class="n">select_dir_keywords</span><span class="p">:</span>
<span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">os</span><span class="o">.</span><span class="n">listdir</span><span class="p">(</span><span class="n">input_dir_path</span><span class="p">):</span> <span class="c1">#Path(input_dir_path).iterdir():</span>
<span class="k">if</span> <span class="nb">any</span><span class="p">([</span><span class="n">item</span> <span class="ow">in</span> <span class="n">keyword</span> <span class="k">for</span> <span class="n">keyword</span> <span class="ow">in</span> <span class="n">select_dir_keywords</span><span class="p">]):</span>
<span class="n">paths</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">input_dir_path</span><span class="p">,</span><span class="n">item</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">paths</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">input_dir_path</span><span class="p">)</span> <span class="c1">#paths.append(Path(input_dir_path))</span>
<span class="n">path_to_files_dict</span> <span class="o">=</span> <span class="p">{}</span> <span class="c1"># Dictionary to store directory-file pairs satisfying constraints</span>
<span class="k">for</span> <span class="n">subpath</span> <span class="ow">in</span> <span class="n">paths</span><span class="p">:</span>
<span class="k">for</span> <span class="n">dirpath</span><span class="p">,</span> <span class="n">_</span><span class="p">,</span> <span class="n">filenames</span> <span class="ow">in</span> <span class="n">os</span><span class="o">.</span><span class="n">walk</span><span class="p">(</span><span class="n">subpath</span><span class="p">,</span><span class="n">topdown</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="c1"># Reduce filenames to those that are admissible</span>
<span class="n">admissible_filenames</span> <span class="o">=</span> <span class="p">[</span>
<span class="n">filename</span> <span class="k">for</span> <span class="n">filename</span> <span class="ow">in</span> <span class="n">filenames</span>
<span class="k">if</span> <span class="n">file_is_selected</span><span class="p">(</span><span class="n">filename</span><span class="p">)</span> <span class="ow">and</span> <span class="n">has_allowed_extension</span><span class="p">(</span><span class="n">filename</span><span class="p">)</span>
<span class="p">]</span>
<span class="k">if</span> <span class="n">admissible_filenames</span><span class="p">:</span> <span class="c1"># Only create directory if there are files to copy</span>
<span class="n">relative_dirpath</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">relpath</span><span class="p">(</span><span class="n">dirpath</span><span class="p">,</span> <span class="n">input_dir_path</span><span class="p">)</span>
<span class="n">target_dirpath</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">output_dir_path</span><span class="p">,</span> <span class="n">relative_dirpath</span><span class="p">)</span>
<span class="n">path_to_files_dict</span><span class="p">[</span><span class="n">target_dirpath</span><span class="p">]</span> <span class="o">=</span> <span class="n">admissible_filenames</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">dry_run</span><span class="p">:</span>
<span class="c1"># Perform the actual copying</span>
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">target_dirpath</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="k">for</span> <span class="n">filename</span> <span class="ow">in</span> <span class="n">admissible_filenames</span><span class="p">:</span>
<span class="n">src_file_path</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">dirpath</span><span class="p">,</span> <span class="n">filename</span><span class="p">)</span>
<span class="n">dest_file_path</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">target_dirpath</span><span class="p">,</span> <span class="n">filename</span><span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">shutil</span><span class="o">.</span><span class="n">copy2</span><span class="p">(</span><span class="n">src_file_path</span><span class="p">,</span> <span class="n">dest_file_path</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="n">logging</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s2">&quot;Failed to copy </span><span class="si">%s</span><span class="s2">: </span><span class="si">%s</span><span class="s2">&quot;</span><span class="p">,</span> <span class="n">src_file_path</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
<span class="k">return</span> <span class="n">path_to_files_dict</span> </div>
<div class="viewcode-block" id="to_serializable_dtype">
<a class="viewcode-back" href="../../modules/utils.html#utils.g5505_utils.to_serializable_dtype">[docs]</a>
<span class="k">def</span> <span class="nf">to_serializable_dtype</span><span class="p">(</span><span class="n">value</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Transform value&#39;s dtype into YAML/JSON compatible dtype</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> value : _type_</span>
<span class="sd"> _description_</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> _type_</span>
<span class="sd"> _description_</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">try</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">generic</span><span class="p">):</span>
<span class="k">if</span> <span class="n">np</span><span class="o">.</span><span class="n">issubdtype</span><span class="p">(</span><span class="n">value</span><span class="o">.</span><span class="n">dtype</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">bytes_</span><span class="p">):</span>
<span class="n">value</span> <span class="o">=</span> <span class="n">value</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s1">&#39;utf-8&#39;</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">np</span><span class="o">.</span><span class="n">issubdtype</span><span class="p">(</span><span class="n">value</span><span class="o">.</span><span class="n">dtype</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">unicode_</span><span class="p">):</span>
<span class="n">value</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">np</span><span class="o">.</span><span class="n">issubdtype</span><span class="p">(</span><span class="n">value</span><span class="o">.</span><span class="n">dtype</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">number</span><span class="p">):</span>
<span class="n">value</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;Yaml-compatible data-type was not found. Value has been set to NaN.&#39;</span><span class="p">)</span>
<span class="n">value</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
<span class="c1"># Handling structured array types (with fields)</span>
<span class="k">if</span> <span class="n">value</span><span class="o">.</span><span class="n">dtype</span><span class="o">.</span><span class="n">names</span><span class="p">:</span>
<span class="n">value</span> <span class="o">=</span> <span class="p">{</span><span class="n">field</span><span class="p">:</span> <span class="n">to_serializable_dtype</span><span class="p">(</span><span class="n">value</span><span class="p">[</span><span class="n">field</span><span class="p">])</span> <span class="k">for</span> <span class="n">field</span> <span class="ow">in</span> <span class="n">value</span><span class="o">.</span><span class="n">dtype</span><span class="o">.</span><span class="n">names</span><span class="p">}</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># Handling regular array NumPy types with assumption of unform dtype accross array elements</span>
<span class="c1"># TODO: evaluate a more general way to check for individual dtypes </span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="nb">bytes</span><span class="p">):</span>
<span class="c1"># Decode bytes</span>
<span class="n">value</span> <span class="o">=</span> <span class="p">[</span><span class="n">item</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s1">&#39;utf-8&#39;</span><span class="p">)</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">value</span><span class="p">]</span> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span> <span class="k">else</span> <span class="n">value</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s1">&#39;utf-8&#39;</span><span class="p">)</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="nb">str</span><span class="p">):</span>
<span class="c1"># Already a string type</span>
<span class="n">value</span> <span class="o">=</span> <span class="p">[</span><span class="nb">str</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">value</span><span class="p">]</span> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span> <span class="k">else</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="nb">int</span><span class="p">):</span>
<span class="c1"># Integer type</span>
<span class="n">value</span> <span class="o">=</span> <span class="p">[</span><span class="nb">int</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">value</span><span class="p">]</span> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span> <span class="k">else</span> <span class="nb">int</span><span class="p">(</span><span class="n">value</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="nb">float</span><span class="p">):</span>
<span class="c1"># Floating type</span>
<span class="n">value</span> <span class="o">=</span> <span class="p">[</span><span class="nb">float</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">value</span><span class="p">]</span> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span> <span class="k">else</span> <span class="nb">float</span><span class="p">(</span><span class="n">value</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="k">else</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;Yaml-compatible data-type was not found. Value has been set to NaN.&#39;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Debug: value.dtype is&quot;</span><span class="p">,</span> <span class="n">value</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
<span class="n">value</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;Error converting value: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s1">. Value has been set to NaN.&#39;</span><span class="p">)</span>
<span class="n">value</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span>
<span class="k">return</span> <span class="n">value</span></div>
<div class="viewcode-block" id="is_structured_array">
<a class="viewcode-back" href="../../modules/utils.html#utils.g5505_utils.is_structured_array">[docs]</a>
<span class="k">def</span> <span class="nf">is_structured_array</span><span class="p">(</span><span class="n">attr_val</span><span class="p">):</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">attr_val</span><span class="p">,</span><span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
<span class="k">return</span> <span class="kc">True</span> <span class="k">if</span> <span class="n">attr_val</span><span class="o">.</span><span class="n">dtype</span><span class="o">.</span><span class="n">names</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="kc">False</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="kc">False</span></div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, JFFO.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

26
docs/build/html/_sources/index.rst.txt vendored Normal file
View File

@@ -0,0 +1,26 @@
.. DIMA documentation master file, created by
sphinx-quickstart on Wed Jul 10 15:50:06 2024.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Welcome to DIMA's documentation!
================================
.. toctree::
:maxdepth: 2
:caption: Contents:
modules/src
modules/pipelines
modules/utils
modules/notebooks
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

View File

View File

@@ -0,0 +1,7 @@
Notebooks
==========================
.. automodule:: notebooks
:members:
:undoc-members:
:show-inheritance:

View File

@@ -0,0 +1,12 @@
Pipelines and workflows
==========================
.. automodule:: pipelines.data_integration
:members:
:undoc-members:
:show-inheritance:
.. automodule:: pipelines.metadata_revision
:members:
:undoc-members:
:show-inheritance:

View File

@@ -0,0 +1,27 @@
HDF5 Data Operations
==========================
.. automodule:: src.hdf5_ops
:members:
:undoc-members:
:show-inheritance:
HDF5 Writer
==========================
.. automodule:: src.hdf5_writer
:members:
:undoc-members:
:show-inheritance:
Data Visualization
==================
.. automodule:: src.hdf5_vis
:members:
:undoc-members:
:show-inheritance:

View File

@@ -0,0 +1,7 @@
Data Structure Conversion
=========================
.. automodule:: utils.g5505_utils
:members:
:undoc-members:
:show-inheritance:

View File

@@ -0,0 +1,7 @@
Tutorial workflows
==========================
.. automodule:: workflow_data_integration
:members:
:undoc-members:
:show-inheritance:

View File

@@ -0,0 +1,123 @@
/* Compatability shim for jQuery and underscores.js.
*
* Copyright Sphinx contributors
* Released under the two clause BSD licence
*/
/**
* small helper function to urldecode strings
*
* See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent#Decoding_query_parameters_from_a_URL
*/
jQuery.urldecode = function(x) {
if (!x) {
return x
}
return decodeURIComponent(x.replace(/\+/g, ' '));
};
/**
* small helper function to urlencode strings
*/
jQuery.urlencode = encodeURIComponent;
/**
* This function returns the parsed url parameters of the
* current request. Multiple values per key are supported,
* it will always return arrays of strings for the value parts.
*/
jQuery.getQueryParameters = function(s) {
if (typeof s === 'undefined')
s = document.location.search;
var parts = s.substr(s.indexOf('?') + 1).split('&');
var result = {};
for (var i = 0; i < parts.length; i++) {
var tmp = parts[i].split('=', 2);
var key = jQuery.urldecode(tmp[0]);
var value = jQuery.urldecode(tmp[1]);
if (key in result)
result[key].push(value);
else
result[key] = [value];
}
return result;
};
/**
* highlight a given string on a jquery object by wrapping it in
* span elements with the given class name.
*/
jQuery.fn.highlightText = function(text, className) {
function highlight(node, addItems) {
if (node.nodeType === 3) {
var val = node.nodeValue;
var pos = val.toLowerCase().indexOf(text);
if (pos >= 0 &&
!jQuery(node.parentNode).hasClass(className) &&
!jQuery(node.parentNode).hasClass("nohighlight")) {
var span;
var isInSVG = jQuery(node).closest("body, svg, foreignObject").is("svg");
if (isInSVG) {
span = document.createElementNS("http://www.w3.org/2000/svg", "tspan");
} else {
span = document.createElement("span");
span.className = className;
}
span.appendChild(document.createTextNode(val.substr(pos, text.length)));
node.parentNode.insertBefore(span, node.parentNode.insertBefore(
document.createTextNode(val.substr(pos + text.length)),
node.nextSibling));
node.nodeValue = val.substr(0, pos);
if (isInSVG) {
var rect = document.createElementNS("http://www.w3.org/2000/svg", "rect");
var bbox = node.parentElement.getBBox();
rect.x.baseVal.value = bbox.x;
rect.y.baseVal.value = bbox.y;
rect.width.baseVal.value = bbox.width;
rect.height.baseVal.value = bbox.height;
rect.setAttribute('class', className);
addItems.push({
"parent": node.parentNode,
"target": rect});
}
}
}
else if (!jQuery(node).is("button, select, textarea")) {
jQuery.each(node.childNodes, function() {
highlight(this, addItems);
});
}
}
var addItems = [];
var result = this.each(function() {
highlight(this, addItems);
});
for (var i = 0; i < addItems.length; ++i) {
jQuery(addItems[i].parent).before(addItems[i].target);
}
return result;
};
/*
* backward compatibility for jQuery.browser
* This will be supported until firefox bug is fixed.
*/
if (!jQuery.browser) {
jQuery.uaMatch = function(ua) {
ua = ua.toLowerCase();
var match = /(chrome)[ \/]([\w.]+)/.exec(ua) ||
/(webkit)[ \/]([\w.]+)/.exec(ua) ||
/(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) ||
/(msie) ([\w.]+)/.exec(ua) ||
ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) ||
[];
return {
browser: match[ 1 ] || "",
version: match[ 2 ] || "0"
};
};
jQuery.browser = {};
jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true;
}

925
docs/build/html/_static/basic.css vendored Normal file
View File

@@ -0,0 +1,925 @@
/*
* basic.css
* ~~~~~~~~~
*
* Sphinx stylesheet -- basic theme.
*
* :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS.
* :license: BSD, see LICENSE for details.
*
*/
/* -- main layout ----------------------------------------------------------- */
div.clearer {
clear: both;
}
div.section::after {
display: block;
content: '';
clear: left;
}
/* -- relbar ---------------------------------------------------------------- */
div.related {
width: 100%;
font-size: 90%;
}
div.related h3 {
display: none;
}
div.related ul {
margin: 0;
padding: 0 0 0 10px;
list-style: none;
}
div.related li {
display: inline;
}
div.related li.right {
float: right;
margin-right: 5px;
}
/* -- sidebar --------------------------------------------------------------- */
div.sphinxsidebarwrapper {
padding: 10px 5px 0 10px;
}
div.sphinxsidebar {
float: left;
width: 230px;
margin-left: -100%;
font-size: 90%;
word-wrap: break-word;
overflow-wrap : break-word;
}
div.sphinxsidebar ul {
list-style: none;
}
div.sphinxsidebar ul ul,
div.sphinxsidebar ul.want-points {
margin-left: 20px;
list-style: square;
}
div.sphinxsidebar ul ul {
margin-top: 0;
margin-bottom: 0;
}
div.sphinxsidebar form {
margin-top: 10px;
}
div.sphinxsidebar input {
border: 1px solid #98dbcc;
font-family: sans-serif;
font-size: 1em;
}
div.sphinxsidebar #searchbox form.search {
overflow: hidden;
}
div.sphinxsidebar #searchbox input[type="text"] {
float: left;
width: 80%;
padding: 0.25em;
box-sizing: border-box;
}
div.sphinxsidebar #searchbox input[type="submit"] {
float: left;
width: 20%;
border-left: none;
padding: 0.25em;
box-sizing: border-box;
}
img {
border: 0;
max-width: 100%;
}
/* -- search page ----------------------------------------------------------- */
ul.search {
margin: 10px 0 0 20px;
padding: 0;
}
ul.search li {
padding: 5px 0 5px 20px;
background-image: url(file.png);
background-repeat: no-repeat;
background-position: 0 7px;
}
ul.search li a {
font-weight: bold;
}
ul.search li p.context {
color: #888;
margin: 2px 0 0 30px;
text-align: left;
}
ul.keywordmatches li.goodmatch a {
font-weight: bold;
}
/* -- index page ------------------------------------------------------------ */
table.contentstable {
width: 90%;
margin-left: auto;
margin-right: auto;
}
table.contentstable p.biglink {
line-height: 150%;
}
a.biglink {
font-size: 1.3em;
}
span.linkdescr {
font-style: italic;
padding-top: 5px;
font-size: 90%;
}
/* -- general index --------------------------------------------------------- */
table.indextable {
width: 100%;
}
table.indextable td {
text-align: left;
vertical-align: top;
}
table.indextable ul {
margin-top: 0;
margin-bottom: 0;
list-style-type: none;
}
table.indextable > tbody > tr > td > ul {
padding-left: 0em;
}
table.indextable tr.pcap {
height: 10px;
}
table.indextable tr.cap {
margin-top: 10px;
background-color: #f2f2f2;
}
img.toggler {
margin-right: 3px;
margin-top: 3px;
cursor: pointer;
}
div.modindex-jumpbox {
border-top: 1px solid #ddd;
border-bottom: 1px solid #ddd;
margin: 1em 0 1em 0;
padding: 0.4em;
}
div.genindex-jumpbox {
border-top: 1px solid #ddd;
border-bottom: 1px solid #ddd;
margin: 1em 0 1em 0;
padding: 0.4em;
}
/* -- domain module index --------------------------------------------------- */
table.modindextable td {
padding: 2px;
border-collapse: collapse;
}
/* -- general body styles --------------------------------------------------- */
div.body {
min-width: 360px;
max-width: 800px;
}
div.body p, div.body dd, div.body li, div.body blockquote {
-moz-hyphens: auto;
-ms-hyphens: auto;
-webkit-hyphens: auto;
hyphens: auto;
}
a.headerlink {
visibility: hidden;
}
a:visited {
color: #551A8B;
}
h1:hover > a.headerlink,
h2:hover > a.headerlink,
h3:hover > a.headerlink,
h4:hover > a.headerlink,
h5:hover > a.headerlink,
h6:hover > a.headerlink,
dt:hover > a.headerlink,
caption:hover > a.headerlink,
p.caption:hover > a.headerlink,
div.code-block-caption:hover > a.headerlink {
visibility: visible;
}
div.body p.caption {
text-align: inherit;
}
div.body td {
text-align: left;
}
.first {
margin-top: 0 !important;
}
p.rubric {
margin-top: 30px;
font-weight: bold;
}
img.align-left, figure.align-left, .figure.align-left, object.align-left {
clear: left;
float: left;
margin-right: 1em;
}
img.align-right, figure.align-right, .figure.align-right, object.align-right {
clear: right;
float: right;
margin-left: 1em;
}
img.align-center, figure.align-center, .figure.align-center, object.align-center {
display: block;
margin-left: auto;
margin-right: auto;
}
img.align-default, figure.align-default, .figure.align-default {
display: block;
margin-left: auto;
margin-right: auto;
}
.align-left {
text-align: left;
}
.align-center {
text-align: center;
}
.align-default {
text-align: center;
}
.align-right {
text-align: right;
}
/* -- sidebars -------------------------------------------------------------- */
div.sidebar,
aside.sidebar {
margin: 0 0 0.5em 1em;
border: 1px solid #ddb;
padding: 7px;
background-color: #ffe;
width: 40%;
float: right;
clear: right;
overflow-x: auto;
}
p.sidebar-title {
font-weight: bold;
}
nav.contents,
aside.topic,
div.admonition, div.topic, blockquote {
clear: left;
}
/* -- topics ---------------------------------------------------------------- */
nav.contents,
aside.topic,
div.topic {
border: 1px solid #ccc;
padding: 7px;
margin: 10px 0 10px 0;
}
p.topic-title {
font-size: 1.1em;
font-weight: bold;
margin-top: 10px;
}
/* -- admonitions ----------------------------------------------------------- */
div.admonition {
margin-top: 10px;
margin-bottom: 10px;
padding: 7px;
}
div.admonition dt {
font-weight: bold;
}
p.admonition-title {
margin: 0px 10px 5px 0px;
font-weight: bold;
}
div.body p.centered {
text-align: center;
margin-top: 25px;
}
/* -- content of sidebars/topics/admonitions -------------------------------- */
div.sidebar > :last-child,
aside.sidebar > :last-child,
nav.contents > :last-child,
aside.topic > :last-child,
div.topic > :last-child,
div.admonition > :last-child {
margin-bottom: 0;
}
div.sidebar::after,
aside.sidebar::after,
nav.contents::after,
aside.topic::after,
div.topic::after,
div.admonition::after,
blockquote::after {
display: block;
content: '';
clear: both;
}
/* -- tables ---------------------------------------------------------------- */
table.docutils {
margin-top: 10px;
margin-bottom: 10px;
border: 0;
border-collapse: collapse;
}
table.align-center {
margin-left: auto;
margin-right: auto;
}
table.align-default {
margin-left: auto;
margin-right: auto;
}
table caption span.caption-number {
font-style: italic;
}
table caption span.caption-text {
}
table.docutils td, table.docutils th {
padding: 1px 8px 1px 5px;
border-top: 0;
border-left: 0;
border-right: 0;
border-bottom: 1px solid #aaa;
}
th {
text-align: left;
padding-right: 5px;
}
table.citation {
border-left: solid 1px gray;
margin-left: 1px;
}
table.citation td {
border-bottom: none;
}
th > :first-child,
td > :first-child {
margin-top: 0px;
}
th > :last-child,
td > :last-child {
margin-bottom: 0px;
}
/* -- figures --------------------------------------------------------------- */
div.figure, figure {
margin: 0.5em;
padding: 0.5em;
}
div.figure p.caption, figcaption {
padding: 0.3em;
}
div.figure p.caption span.caption-number,
figcaption span.caption-number {
font-style: italic;
}
div.figure p.caption span.caption-text,
figcaption span.caption-text {
}
/* -- field list styles ----------------------------------------------------- */
table.field-list td, table.field-list th {
border: 0 !important;
}
.field-list ul {
margin: 0;
padding-left: 1em;
}
.field-list p {
margin: 0;
}
.field-name {
-moz-hyphens: manual;
-ms-hyphens: manual;
-webkit-hyphens: manual;
hyphens: manual;
}
/* -- hlist styles ---------------------------------------------------------- */
table.hlist {
margin: 1em 0;
}
table.hlist td {
vertical-align: top;
}
/* -- object description styles --------------------------------------------- */
.sig {
font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace;
}
.sig-name, code.descname {
background-color: transparent;
font-weight: bold;
}
.sig-name {
font-size: 1.1em;
}
code.descname {
font-size: 1.2em;
}
.sig-prename, code.descclassname {
background-color: transparent;
}
.optional {
font-size: 1.3em;
}
.sig-paren {
font-size: larger;
}
.sig-param.n {
font-style: italic;
}
/* C++ specific styling */
.sig-inline.c-texpr,
.sig-inline.cpp-texpr {
font-family: unset;
}
.sig.c .k, .sig.c .kt,
.sig.cpp .k, .sig.cpp .kt {
color: #0033B3;
}
.sig.c .m,
.sig.cpp .m {
color: #1750EB;
}
.sig.c .s, .sig.c .sc,
.sig.cpp .s, .sig.cpp .sc {
color: #067D17;
}
/* -- other body styles ----------------------------------------------------- */
ol.arabic {
list-style: decimal;
}
ol.loweralpha {
list-style: lower-alpha;
}
ol.upperalpha {
list-style: upper-alpha;
}
ol.lowerroman {
list-style: lower-roman;
}
ol.upperroman {
list-style: upper-roman;
}
:not(li) > ol > li:first-child > :first-child,
:not(li) > ul > li:first-child > :first-child {
margin-top: 0px;
}
:not(li) > ol > li:last-child > :last-child,
:not(li) > ul > li:last-child > :last-child {
margin-bottom: 0px;
}
ol.simple ol p,
ol.simple ul p,
ul.simple ol p,
ul.simple ul p {
margin-top: 0;
}
ol.simple > li:not(:first-child) > p,
ul.simple > li:not(:first-child) > p {
margin-top: 0;
}
ol.simple p,
ul.simple p {
margin-bottom: 0;
}
aside.footnote > span,
div.citation > span {
float: left;
}
aside.footnote > span:last-of-type,
div.citation > span:last-of-type {
padding-right: 0.5em;
}
aside.footnote > p {
margin-left: 2em;
}
div.citation > p {
margin-left: 4em;
}
aside.footnote > p:last-of-type,
div.citation > p:last-of-type {
margin-bottom: 0em;
}
aside.footnote > p:last-of-type:after,
div.citation > p:last-of-type:after {
content: "";
clear: both;
}
dl.field-list {
display: grid;
grid-template-columns: fit-content(30%) auto;
}
dl.field-list > dt {
font-weight: bold;
word-break: break-word;
padding-left: 0.5em;
padding-right: 5px;
}
dl.field-list > dd {
padding-left: 0.5em;
margin-top: 0em;
margin-left: 0em;
margin-bottom: 0em;
}
dl {
margin-bottom: 15px;
}
dd > :first-child {
margin-top: 0px;
}
dd ul, dd table {
margin-bottom: 10px;
}
dd {
margin-top: 3px;
margin-bottom: 10px;
margin-left: 30px;
}
.sig dd {
margin-top: 0px;
margin-bottom: 0px;
}
.sig dl {
margin-top: 0px;
margin-bottom: 0px;
}
dl > dd:last-child,
dl > dd:last-child > :last-child {
margin-bottom: 0;
}
dt:target, span.highlighted {
background-color: #fbe54e;
}
rect.highlighted {
fill: #fbe54e;
}
dl.glossary dt {
font-weight: bold;
font-size: 1.1em;
}
.versionmodified {
font-style: italic;
}
.system-message {
background-color: #fda;
padding: 5px;
border: 3px solid red;
}
.footnote:target {
background-color: #ffa;
}
.line-block {
display: block;
margin-top: 1em;
margin-bottom: 1em;
}
.line-block .line-block {
margin-top: 0;
margin-bottom: 0;
margin-left: 1.5em;
}
.guilabel, .menuselection {
font-family: sans-serif;
}
.accelerator {
text-decoration: underline;
}
.classifier {
font-style: oblique;
}
.classifier:before {
font-style: normal;
margin: 0 0.5em;
content: ":";
display: inline-block;
}
abbr, acronym {
border-bottom: dotted 1px;
cursor: help;
}
.translated {
background-color: rgba(207, 255, 207, 0.2)
}
.untranslated {
background-color: rgba(255, 207, 207, 0.2)
}
/* -- code displays --------------------------------------------------------- */
pre {
overflow: auto;
overflow-y: hidden; /* fixes display issues on Chrome browsers */
}
pre, div[class*="highlight-"] {
clear: both;
}
span.pre {
-moz-hyphens: none;
-ms-hyphens: none;
-webkit-hyphens: none;
hyphens: none;
white-space: nowrap;
}
div[class*="highlight-"] {
margin: 1em 0;
}
td.linenos pre {
border: 0;
background-color: transparent;
color: #aaa;
}
table.highlighttable {
display: block;
}
table.highlighttable tbody {
display: block;
}
table.highlighttable tr {
display: flex;
}
table.highlighttable td {
margin: 0;
padding: 0;
}
table.highlighttable td.linenos {
padding-right: 0.5em;
}
table.highlighttable td.code {
flex: 1;
overflow: hidden;
}
.highlight .hll {
display: block;
}
div.highlight pre,
table.highlighttable pre {
margin: 0;
}
div.code-block-caption + div {
margin-top: 0;
}
div.code-block-caption {
margin-top: 1em;
padding: 2px 5px;
font-size: small;
}
div.code-block-caption code {
background-color: transparent;
}
table.highlighttable td.linenos,
span.linenos,
div.highlight span.gp { /* gp: Generic.Prompt */
user-select: none;
-webkit-user-select: text; /* Safari fallback only */
-webkit-user-select: none; /* Chrome/Safari */
-moz-user-select: none; /* Firefox */
-ms-user-select: none; /* IE10+ */
}
div.code-block-caption span.caption-number {
padding: 0.1em 0.3em;
font-style: italic;
}
div.code-block-caption span.caption-text {
}
div.literal-block-wrapper {
margin: 1em 0;
}
code.xref, a code {
background-color: transparent;
font-weight: bold;
}
h1 code, h2 code, h3 code, h4 code, h5 code, h6 code {
background-color: transparent;
}
.viewcode-link {
float: right;
}
.viewcode-back {
float: right;
font-family: sans-serif;
}
div.viewcode-block:target {
margin: -1px -10px;
padding: 0 10px;
}
/* -- math display ---------------------------------------------------------- */
img.math {
vertical-align: middle;
}
div.body div.math p {
text-align: center;
}
span.eqno {
float: right;
}
span.eqno a.headerlink {
position: absolute;
z-index: 1;
}
div.math:hover a.headerlink {
visibility: visible;
}
/* -- printout stylesheet --------------------------------------------------- */
@media print {
div.document,
div.documentwrapper,
div.bodywrapper {
margin: 0 !important;
width: 100%;
}
div.sphinxsidebar,
div.related,
div.footer,
#top-link {
display: none;
}
}

View File

@@ -0,0 +1 @@
.clearfix{*zoom:1}.clearfix:after,.clearfix:before{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-style:normal;font-weight:400;src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713?#iefix) format("embedded-opentype"),url(fonts/fontawesome-webfont.woff2?af7ae505a9eed503f8b8e6982036873e) format("woff2"),url(fonts/fontawesome-webfont.woff?fee66e712a8a08eef5805a46892932ad) format("woff"),url(fonts/fontawesome-webfont.ttf?b06871f281fee6b241d60582ae9369b9) format("truetype"),url(fonts/fontawesome-webfont.svg?912ec66d7572ff821749319396470bde#FontAwesome) format("svg")}.fa:before{font-family:FontAwesome;font-style:normal;font-weight:400;line-height:1}.fa:before,a .fa{text-decoration:inherit}.fa:before,a .fa,li .fa{display:inline-block}li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before,.icon-book:before{content:"\f02d"}.fa-caret-down:before,.icon-caret-down:before{content:"\f0d7"}.fa-caret-up:before,.icon-caret-up:before{content:"\f0d8"}.fa-caret-left:before,.icon-caret-left:before{content:"\f0d9"}.fa-caret-right:before,.icon-caret-right:before{content:"\f0da"}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;z-index:400}.rst-versions a{color:#2980b9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27ae60}.rst-versions .rst-current-version:after{clear:both;content:"";display:block}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#e74c3c;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#f1c40f;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:grey;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:1px solid #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none;line-height:30px}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge>.rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width:768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 434 KiB

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

4
docs/build/html/_static/css/theme.css vendored Normal file

File diff suppressed because one or more lines are too long

156
docs/build/html/_static/doctools.js vendored Normal file
View File

@@ -0,0 +1,156 @@
/*
* doctools.js
* ~~~~~~~~~~~
*
* Base JavaScript utilities for all Sphinx HTML documentation.
*
* :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS.
* :license: BSD, see LICENSE for details.
*
*/
"use strict";
const BLACKLISTED_KEY_CONTROL_ELEMENTS = new Set([
"TEXTAREA",
"INPUT",
"SELECT",
"BUTTON",
]);
const _ready = (callback) => {
if (document.readyState !== "loading") {
callback();
} else {
document.addEventListener("DOMContentLoaded", callback);
}
};
/**
* Small JavaScript module for the documentation.
*/
const Documentation = {
init: () => {
Documentation.initDomainIndexTable();
Documentation.initOnKeyListeners();
},
/**
* i18n support
*/
TRANSLATIONS: {},
PLURAL_EXPR: (n) => (n === 1 ? 0 : 1),
LOCALE: "unknown",
// gettext and ngettext don't access this so that the functions
// can safely bound to a different name (_ = Documentation.gettext)
gettext: (string) => {
const translated = Documentation.TRANSLATIONS[string];
switch (typeof translated) {
case "undefined":
return string; // no translation
case "string":
return translated; // translation exists
default:
return translated[0]; // (singular, plural) translation tuple exists
}
},
ngettext: (singular, plural, n) => {
const translated = Documentation.TRANSLATIONS[singular];
if (typeof translated !== "undefined")
return translated[Documentation.PLURAL_EXPR(n)];
return n === 1 ? singular : plural;
},
addTranslations: (catalog) => {
Object.assign(Documentation.TRANSLATIONS, catalog.messages);
Documentation.PLURAL_EXPR = new Function(
"n",
`return (${catalog.plural_expr})`
);
Documentation.LOCALE = catalog.locale;
},
/**
* helper function to focus on search bar
*/
focusSearchBar: () => {
document.querySelectorAll("input[name=q]")[0]?.focus();
},
/**
* Initialise the domain index toggle buttons
*/
initDomainIndexTable: () => {
const toggler = (el) => {
const idNumber = el.id.substr(7);
const toggledRows = document.querySelectorAll(`tr.cg-${idNumber}`);
if (el.src.substr(-9) === "minus.png") {
el.src = `${el.src.substr(0, el.src.length - 9)}plus.png`;
toggledRows.forEach((el) => (el.style.display = "none"));
} else {
el.src = `${el.src.substr(0, el.src.length - 8)}minus.png`;
toggledRows.forEach((el) => (el.style.display = ""));
}
};
const togglerElements = document.querySelectorAll("img.toggler");
togglerElements.forEach((el) =>
el.addEventListener("click", (event) => toggler(event.currentTarget))
);
togglerElements.forEach((el) => (el.style.display = ""));
if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) togglerElements.forEach(toggler);
},
initOnKeyListeners: () => {
// only install a listener if it is really needed
if (
!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS &&
!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS
)
return;
document.addEventListener("keydown", (event) => {
// bail for input elements
if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return;
// bail with special keys
if (event.altKey || event.ctrlKey || event.metaKey) return;
if (!event.shiftKey) {
switch (event.key) {
case "ArrowLeft":
if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break;
const prevLink = document.querySelector('link[rel="prev"]');
if (prevLink && prevLink.href) {
window.location.href = prevLink.href;
event.preventDefault();
}
break;
case "ArrowRight":
if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break;
const nextLink = document.querySelector('link[rel="next"]');
if (nextLink && nextLink.href) {
window.location.href = nextLink.href;
event.preventDefault();
}
break;
}
}
// some keyboard layouts may need Shift to get /
switch (event.key) {
case "/":
if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) break;
Documentation.focusSearchBar();
event.preventDefault();
}
});
},
};
// quick alias for translations
const _ = Documentation.gettext;
_ready(Documentation.init);

View File

@@ -0,0 +1,13 @@
const DOCUMENTATION_OPTIONS = {
VERSION: '1.0.0',
LANGUAGE: 'en',
COLLAPSE_INDEX: false,
BUILDER: 'html',
FILE_SUFFIX: '.html',
LINK_SUFFIX: '.html',
HAS_SOURCE: true,
SOURCELINK_SUFFIX: '.txt',
NAVIGATION_WITH_KEYS: false,
SHOW_SEARCH_SUMMARY: true,
ENABLE_SEARCH_SHORTCUTS: true,
};

BIN
docs/build/html/_static/file.png vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 286 B

2
docs/build/html/_static/jquery.js vendored Normal file

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1 @@
!function(e){var t={};function r(n){if(t[n])return t[n].exports;var o=t[n]={i:n,l:!1,exports:{}};return e[n].call(o.exports,o,o.exports,r),o.l=!0,o.exports}r.m=e,r.c=t,r.d=function(e,t,n){r.o(e,t)||Object.defineProperty(e,t,{enumerable:!0,get:n})},r.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},r.t=function(e,t){if(1&t&&(e=r(e)),8&t)return e;if(4&t&&"object"==typeof e&&e&&e.__esModule)return e;var n=Object.create(null);if(r.r(n),Object.defineProperty(n,"default",{enumerable:!0,value:e}),2&t&&"string"!=typeof e)for(var o in e)r.d(n,o,function(t){return e[t]}.bind(null,o));return n},r.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return r.d(t,"a",t),t},r.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r.p="",r(r.s=4)}({4:function(e,t,r){}});

View File

@@ -0,0 +1,4 @@
/**
* @preserve HTML5 Shiv 3.7.3-pre | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed
*/
!function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x<style>"+b+"</style>",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=y.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=y.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),y.elements=c+" "+a,j(b)}function f(a){var b=x[a[v]];return b||(b={},w++,a[v]=w,x[w]=b),b}function g(a,c,d){if(c||(c=b),q)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():u.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||t.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),q)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return y.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(y,b.frag)}function j(a){a||(a=b);var d=f(a);return!y.shivCSS||p||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),q||i(a,d),a}function k(a){for(var b,c=a.getElementsByTagName("*"),e=c.length,f=RegExp("^(?:"+d().join("|")+")$","i"),g=[];e--;)b=c[e],f.test(b.nodeName)&&g.push(b.applyElement(l(b)));return g}function l(a){for(var b,c=a.attributes,d=c.length,e=a.ownerDocument.createElement(A+":"+a.nodeName);d--;)b=c[d],b.specified&&e.setAttribute(b.nodeName,b.nodeValue);return e.style.cssText=a.style.cssText,e}function m(a){for(var b,c=a.split("{"),e=c.length,f=RegExp("(^|[\\s,>+~])("+d().join("|")+")(?=[[\\s,>+~#.:]|$)","gi"),g="$1"+A+"\\:$2";e--;)b=c[e]=c[e].split("}"),b[b.length-1]=b[b.length-1].replace(f,g),c[e]=b.join("}");return c.join("{")}function n(a){for(var b=a.length;b--;)a[b].removeNode()}function o(a){function b(){clearTimeout(g._removeSheetTimer),d&&d.removeNode(!0),d=null}var d,e,g=f(a),h=a.namespaces,i=a.parentWindow;return!B||a.printShived?a:("undefined"==typeof h[A]&&h.add(A),i.attachEvent("onbeforeprint",function(){b();for(var f,g,h,i=a.styleSheets,j=[],l=i.length,n=Array(l);l--;)n[l]=i[l];for(;h=n.pop();)if(!h.disabled&&z.test(h.media)){try{f=h.imports,g=f.length}catch(o){g=0}for(l=0;g>l;l++)n.push(f[l]);try{j.push(h.cssText)}catch(o){}}j=m(j.reverse().join("")),e=k(a),d=c(a,j)}),i.attachEvent("onafterprint",function(){n(e),clearTimeout(g._removeSheetTimer),g._removeSheetTimer=setTimeout(b,500)}),a.printShived=!0,a)}var p,q,r="3.7.3",s=a.html5||{},t=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,u=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,v="_html5shiv",w=0,x={};!function(){try{var a=b.createElement("a");a.innerHTML="<xyz></xyz>",p="hidden"in a,q=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){p=!0,q=!0}}();var y={elements:s.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:r,shivCSS:s.shivCSS!==!1,supportsUnknownElements:q,shivMethods:s.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=y,j(b);var z=/^$|\b(?:all|print)\b/,A="html5shiv",B=!q&&function(){var c=b.documentElement;return!("undefined"==typeof b.namespaces||"undefined"==typeof b.parentWindow||"undefined"==typeof c.applyElement||"undefined"==typeof c.removeNode||"undefined"==typeof a.attachEvent)}();y.type+=" print",y.shivPrint=o,o(b),"object"==typeof module&&module.exports&&(module.exports=y)}("undefined"!=typeof window?window:this,document);

View File

@@ -0,0 +1,4 @@
/**
* @preserve HTML5 Shiv 3.7.3 | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed
*/
!function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x<style>"+b+"</style>",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=t.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=t.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),t.elements=c+" "+a,j(b)}function f(a){var b=s[a[q]];return b||(b={},r++,a[q]=r,s[r]=b),b}function g(a,c,d){if(c||(c=b),l)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():p.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||o.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),l)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return t.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(t,b.frag)}function j(a){a||(a=b);var d=f(a);return!t.shivCSS||k||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),l||i(a,d),a}var k,l,m="3.7.3-pre",n=a.html5||{},o=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,p=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,q="_html5shiv",r=0,s={};!function(){try{var a=b.createElement("a");a.innerHTML="<xyz></xyz>",k="hidden"in a,l=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){k=!0,l=!0}}();var t={elements:n.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:m,shivCSS:n.shivCSS!==!1,supportsUnknownElements:l,shivMethods:n.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=t,j(b),"object"==typeof module&&module.exports&&(module.exports=t)}("undefined"!=typeof window?window:this,document);

1
docs/build/html/_static/js/theme.js vendored Normal file

File diff suppressed because one or more lines are too long

199
docs/build/html/_static/language_data.js vendored Normal file
View File

@@ -0,0 +1,199 @@
/*
* language_data.js
* ~~~~~~~~~~~~~~~~
*
* This script contains the language-specific data used by searchtools.js,
* namely the list of stopwords, stemmer, scorer and splitter.
*
* :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS.
* :license: BSD, see LICENSE for details.
*
*/
var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"];
/* Non-minified version is copied as a separate JS file, if available */
/**
* Porter Stemmer
*/
var Stemmer = function() {
var step2list = {
ational: 'ate',
tional: 'tion',
enci: 'ence',
anci: 'ance',
izer: 'ize',
bli: 'ble',
alli: 'al',
entli: 'ent',
eli: 'e',
ousli: 'ous',
ization: 'ize',
ation: 'ate',
ator: 'ate',
alism: 'al',
iveness: 'ive',
fulness: 'ful',
ousness: 'ous',
aliti: 'al',
iviti: 'ive',
biliti: 'ble',
logi: 'log'
};
var step3list = {
icate: 'ic',
ative: '',
alize: 'al',
iciti: 'ic',
ical: 'ic',
ful: '',
ness: ''
};
var c = "[^aeiou]"; // consonant
var v = "[aeiouy]"; // vowel
var C = c + "[^aeiouy]*"; // consonant sequence
var V = v + "[aeiou]*"; // vowel sequence
var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0
var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1
var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1
var s_v = "^(" + C + ")?" + v; // vowel in stem
this.stemWord = function (w) {
var stem;
var suffix;
var firstch;
var origword = w;
if (w.length < 3)
return w;
var re;
var re2;
var re3;
var re4;
firstch = w.substr(0,1);
if (firstch == "y")
w = firstch.toUpperCase() + w.substr(1);
// Step 1a
re = /^(.+?)(ss|i)es$/;
re2 = /^(.+?)([^s])s$/;
if (re.test(w))
w = w.replace(re,"$1$2");
else if (re2.test(w))
w = w.replace(re2,"$1$2");
// Step 1b
re = /^(.+?)eed$/;
re2 = /^(.+?)(ed|ing)$/;
if (re.test(w)) {
var fp = re.exec(w);
re = new RegExp(mgr0);
if (re.test(fp[1])) {
re = /.$/;
w = w.replace(re,"");
}
}
else if (re2.test(w)) {
var fp = re2.exec(w);
stem = fp[1];
re2 = new RegExp(s_v);
if (re2.test(stem)) {
w = stem;
re2 = /(at|bl|iz)$/;
re3 = new RegExp("([^aeiouylsz])\\1$");
re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
if (re2.test(w))
w = w + "e";
else if (re3.test(w)) {
re = /.$/;
w = w.replace(re,"");
}
else if (re4.test(w))
w = w + "e";
}
}
// Step 1c
re = /^(.+?)y$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
re = new RegExp(s_v);
if (re.test(stem))
w = stem + "i";
}
// Step 2
re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
suffix = fp[2];
re = new RegExp(mgr0);
if (re.test(stem))
w = stem + step2list[suffix];
}
// Step 3
re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
suffix = fp[2];
re = new RegExp(mgr0);
if (re.test(stem))
w = stem + step3list[suffix];
}
// Step 4
re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
re2 = /^(.+?)(s|t)(ion)$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
re = new RegExp(mgr1);
if (re.test(stem))
w = stem;
}
else if (re2.test(w)) {
var fp = re2.exec(w);
stem = fp[1] + fp[2];
re2 = new RegExp(mgr1);
if (re2.test(stem))
w = stem;
}
// Step 5
re = /^(.+?)e$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
re = new RegExp(mgr1);
re2 = new RegExp(meq1);
re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
if (re.test(stem) || (re2.test(stem) && !(re3.test(stem))))
w = stem;
}
re = /ll$/;
re2 = new RegExp(mgr1);
if (re.test(w) && re2.test(w)) {
re = /.$/;
w = w.replace(re,"");
}
// and turn initial Y back to y
if (firstch == "y")
w = firstch.toLowerCase() + w.substr(1);
return w;
}
}

BIN
docs/build/html/_static/minus.png vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 90 B

View File

@@ -0,0 +1,9 @@
<svg xmlns="http://www.w3.org/2000/svg" width="100" height="100">
<style>
svg { fill: lightcoral; }
@media (prefers-color-scheme: dark) {
svg { fill: crimson; }
}
</style>
<path d="m 80.856887,38.34474 h 2.903414 l 7.066397,13.332221 V 38.34474 h 2.092166 V 54.281494 H 90.01545 L 82.949053,40.949273 v 13.332221 h -2.092166 z m -13.812565,0 h 10.076555 v 1.814633 h -7.920343 v 4.718048 h 7.589439 v 1.814634 h -7.589439 v 5.774805 h 8.11248 v 1.814634 H 67.044322 Z m -14.335606,0 h 2.156212 v 6.735493 L 62.01672,38.34474 h 2.775323 l -7.909669,7.429324 8.475407,8.50743 h -2.839368 l -7.653485,-7.674833 v 7.674833 h -2.156212 z m -10.738365,1.462381 q -2.348349,0 -3.73601,1.750588 -1.376987,1.750588 -1.376987,4.771419 0,3.010158 1.376987,4.760746 1.387661,1.750587 3.73601,1.750587 2.34835,0 3.714663,-1.750587 1.376986,-1.750588 1.376986,-4.760746 0,-3.020831 -1.376986,-4.771419 -1.366313,-1.750588 -3.714663,-1.750588 z m 0,-1.750588 q 3.351736,0 5.358507,2.252281 2.006772,2.241606 2.006772,6.020314 0,3.768034 -2.006772,6.020315 -2.006771,2.241606 -5.358507,2.241606 -3.362409,0 -5.379855,-2.241606 -2.006772,-2.241607 -2.006772,-6.020315 0,-3.778708 2.006772,-6.020314 2.017446,-2.252281 5.379855,-2.252281 z m -14.100771,8.75294 q 0.693831,0.234835 1.344964,1.003385 0.661808,0.768551 1.323615,2.113515 l 2.188235,4.355121 H 30.410068 L 28.371273,50.193231 Q 27.581374,48.592083 26.834172,48.069042 26.097644,47.546 24.816726,47.546 h -2.34835 v 6.735494 H 20.312164 V 38.34474 h 4.867489 q 2.732625,0 4.077588,1.142152 1.344964,1.142151 1.344964,3.447804 0,1.505078 -0.704505,2.49779 -0.69383,0.992711 -2.02812,1.376987 z m -5.401204,-6.692797 v 5.657388 h 2.711277 q 1.55845,0 2.348349,-0.715179 0.800574,-0.725854 0.800574,-2.124189 0,-1.398336 -0.800574,-2.102841 -0.789899,-0.715179 -2.348349,-0.715179 z M 7.4709616,46.670707 v 5.838851 H 10.92944 q 1.739914,0 2.57251,-0.71518 0.843271,-0.725853 0.843271,-2.209583 0,-1.494404 -0.843271,-2.198909 -0.832596,-0.715179 -2.57251,-0.715179 z m 0,-6.554031 v 4.803443 h 3.1916204 q 1.579799,0 2.34835,-0.587088 0.779225,-0.597761 0.779225,-1.814634 0,-1.206197 -0.779225,-1.803959 -0.768551,-0.597762 -2.34835,-0.597762 z M 5.3147497,38.34474 h 5.5079473 q 2.465767,0 3.800057,1.024734 1.334289,1.024734 1.334289,2.914088 0,1.462382 -0.683156,2.327001 -0.683156,0.86462 -2.006772,1.078106 1.590474,0.341579 2.465768,1.430359 0.885968,1.078106 0.885968,2.700602 0,2.134863 -1.451707,3.298364 -1.451707,1.1635 -4.130961,1.1635 H 5.3147497 Z M 65.365328,74.550032 H 67.52154 V 88.67217 h 7.760238 v 1.814634 h -9.91645 z m -6.447298,0 h 2.156214 V 90.486804 H 58.91803 Z m -9.955303,2.124192 -2.924765,7.931025 h 5.860208 z m -1.216872,-2.124192 h 2.444423 l 6.073692,15.936772 h -2.241606 l -1.451709,-4.088268 h -7.183824 l -1.451707,4.088268 H 41.66149 Z m -21.264514,0 h 2.903417 L 36.45116,87.882271 V 74.550032 h 2.092171 V 90.486804 H 35.639914 L 28.573506,77.154569 v 13.332235 h -2.092165 z m 53.454813,-9.781663 v 5.838858 h 3.458479 q 1.739919,0 2.572513,-0.71518 0.843272,-0.725854 0.843272,-2.209586 0,-1.494406 -0.843272,-2.198912 -0.832594,-0.71518 -2.572513,-0.71518 z m 0,-6.554037 v 4.803447 h 3.191622 q 1.579803,0 2.348355,-0.587087 0.779225,-0.597763 0.779225,-1.814636 0,-1.206199 -0.779225,-1.803962 -0.768552,-0.597762 -2.348355,-0.597762 z m -2.156212,-1.771939 h 5.507949 q 2.465772,0 3.800065,1.024736 1.334288,1.024735 1.334288,2.914092 0,1.462383 -0.683157,2.327003 -0.683157,0.864621 -2.006771,1.078107 1.590471,0.341579 2.46577,1.43036 0.885967,1.078107 0.885967,2.700605 0,2.134866 -1.451708,3.298368 -1.451709,1.163502 -4.130963,1.163502 h -5.72144 z m -18.86154,0 h 3.212969 l 4.066921,10.845118 4.088269,-10.845118 H 73.49953 V 72.379166 H 71.396692 V 58.385121 L 67.287075,69.315634 H 65.120183 L 61.010567,58.385121 v 13.994045 h -2.092165 z m -16.246333,0 h 2.166892 v 9.681616 q 0,2.561839 0.928668,3.693318 0.928667,1.120804 3.010159,1.120804 2.070819,0 2.999486,-1.120804 0.928667,-1.131479 0.928667,-3.693318 v -9.681616 h 2.166886 v 9.948474 q 0,3.116904 -1.547776,4.70738 -1.537104,1.590474 -4.547263,1.590474 -3.020833,0 -4.568615,-1.590474 -1.537104,-1.590476 -1.537104,-4.70738 z m -16.192953,0 h 2.156213 v 6.532689 h 7.83496 v -6.532689 h 2.156213 v 15.936773 h -2.156213 v -7.589448 h -7.83496 v 7.589448 h -2.156213 z m -15.563168,0 h 13.481671 v 1.814636 H 18.740226 V 72.379166 H 16.573339 V 58.257029 h -5.657391 z" />
</svg>

After

Width:  |  Height:  |  Size: 4.4 KiB

View File

@@ -0,0 +1,259 @@
/* remove conflicting styling from Sphinx themes */
div.nbinput.container div.prompt *,
div.nboutput.container div.prompt *,
div.nbinput.container div.input_area pre,
div.nboutput.container div.output_area pre,
div.nbinput.container div.input_area .highlight,
div.nboutput.container div.output_area .highlight {
border: none;
padding: 0;
margin: 0;
box-shadow: none;
}
div.nbinput.container > div[class*=highlight],
div.nboutput.container > div[class*=highlight] {
margin: 0;
}
div.nbinput.container div.prompt *,
div.nboutput.container div.prompt * {
background: none;
}
div.nboutput.container div.output_area .highlight,
div.nboutput.container div.output_area pre {
background: unset;
}
div.nboutput.container div.output_area div.highlight {
color: unset; /* override Pygments text color */
}
/* avoid gaps between output lines */
div.nboutput.container div[class*=highlight] pre {
line-height: normal;
}
/* input/output containers */
div.nbinput.container,
div.nboutput.container {
display: -webkit-flex;
display: flex;
align-items: flex-start;
margin: 0;
width: 100%;
}
@media (max-width: 540px) {
div.nbinput.container,
div.nboutput.container {
flex-direction: column;
}
}
/* input container */
div.nbinput.container {
padding-top: 5px;
}
/* last container */
div.nblast.container {
padding-bottom: 5px;
}
/* input prompt */
div.nbinput.container div.prompt pre,
/* for sphinx_immaterial theme: */
div.nbinput.container div.prompt pre > code {
color: #307FC1;
}
/* output prompt */
div.nboutput.container div.prompt pre,
/* for sphinx_immaterial theme: */
div.nboutput.container div.prompt pre > code {
color: #BF5B3D;
}
/* all prompts */
div.nbinput.container div.prompt,
div.nboutput.container div.prompt {
width: 4.5ex;
padding-top: 5px;
position: relative;
user-select: none;
}
div.nbinput.container div.prompt > div,
div.nboutput.container div.prompt > div {
position: absolute;
right: 0;
margin-right: 0.3ex;
}
@media (max-width: 540px) {
div.nbinput.container div.prompt,
div.nboutput.container div.prompt {
width: unset;
text-align: left;
padding: 0.4em;
}
div.nboutput.container div.prompt.empty {
padding: 0;
}
div.nbinput.container div.prompt > div,
div.nboutput.container div.prompt > div {
position: unset;
}
}
/* disable scrollbars and line breaks on prompts */
div.nbinput.container div.prompt pre,
div.nboutput.container div.prompt pre {
overflow: hidden;
white-space: pre;
}
/* input/output area */
div.nbinput.container div.input_area,
div.nboutput.container div.output_area {
-webkit-flex: 1;
flex: 1;
overflow: auto;
}
@media (max-width: 540px) {
div.nbinput.container div.input_area,
div.nboutput.container div.output_area {
width: 100%;
}
}
/* input area */
div.nbinput.container div.input_area {
border: 1px solid #e0e0e0;
border-radius: 2px;
/*background: #f5f5f5;*/
}
/* override MathJax center alignment in output cells */
div.nboutput.container div[class*=MathJax] {
text-align: left !important;
}
/* override sphinx.ext.imgmath center alignment in output cells */
div.nboutput.container div.math p {
text-align: left;
}
/* standard error */
div.nboutput.container div.output_area.stderr {
background: #fdd;
}
/* ANSI colors */
.ansi-black-fg { color: #3E424D; }
.ansi-black-bg { background-color: #3E424D; }
.ansi-black-intense-fg { color: #282C36; }
.ansi-black-intense-bg { background-color: #282C36; }
.ansi-red-fg { color: #E75C58; }
.ansi-red-bg { background-color: #E75C58; }
.ansi-red-intense-fg { color: #B22B31; }
.ansi-red-intense-bg { background-color: #B22B31; }
.ansi-green-fg { color: #00A250; }
.ansi-green-bg { background-color: #00A250; }
.ansi-green-intense-fg { color: #007427; }
.ansi-green-intense-bg { background-color: #007427; }
.ansi-yellow-fg { color: #DDB62B; }
.ansi-yellow-bg { background-color: #DDB62B; }
.ansi-yellow-intense-fg { color: #B27D12; }
.ansi-yellow-intense-bg { background-color: #B27D12; }
.ansi-blue-fg { color: #208FFB; }
.ansi-blue-bg { background-color: #208FFB; }
.ansi-blue-intense-fg { color: #0065CA; }
.ansi-blue-intense-bg { background-color: #0065CA; }
.ansi-magenta-fg { color: #D160C4; }
.ansi-magenta-bg { background-color: #D160C4; }
.ansi-magenta-intense-fg { color: #A03196; }
.ansi-magenta-intense-bg { background-color: #A03196; }
.ansi-cyan-fg { color: #60C6C8; }
.ansi-cyan-bg { background-color: #60C6C8; }
.ansi-cyan-intense-fg { color: #258F8F; }
.ansi-cyan-intense-bg { background-color: #258F8F; }
.ansi-white-fg { color: #C5C1B4; }
.ansi-white-bg { background-color: #C5C1B4; }
.ansi-white-intense-fg { color: #A1A6B2; }
.ansi-white-intense-bg { background-color: #A1A6B2; }
.ansi-default-inverse-fg { color: #FFFFFF; }
.ansi-default-inverse-bg { background-color: #000000; }
.ansi-bold { font-weight: bold; }
.ansi-underline { text-decoration: underline; }
div.nbinput.container div.input_area div[class*=highlight] > pre,
div.nboutput.container div.output_area div[class*=highlight] > pre,
div.nboutput.container div.output_area div[class*=highlight].math,
div.nboutput.container div.output_area.rendered_html,
div.nboutput.container div.output_area > div.output_javascript,
div.nboutput.container div.output_area:not(.rendered_html) > img{
padding: 5px;
margin: 0;
}
/* fix copybtn overflow problem in chromium (needed for 'sphinx_copybutton') */
div.nbinput.container div.input_area > div[class^='highlight'],
div.nboutput.container div.output_area > div[class^='highlight']{
overflow-y: hidden;
}
/* hide copy button on prompts for 'sphinx_copybutton' extension ... */
.prompt .copybtn,
/* ... and 'sphinx_immaterial' theme */
.prompt .md-clipboard.md-icon {
display: none;
}
/* Some additional styling taken form the Jupyter notebook CSS */
.jp-RenderedHTMLCommon table,
div.rendered_html table {
border: none;
border-collapse: collapse;
border-spacing: 0;
color: black;
font-size: 12px;
table-layout: fixed;
}
.jp-RenderedHTMLCommon thead,
div.rendered_html thead {
border-bottom: 1px solid black;
vertical-align: bottom;
}
.jp-RenderedHTMLCommon tr,
.jp-RenderedHTMLCommon th,
.jp-RenderedHTMLCommon td,
div.rendered_html tr,
div.rendered_html th,
div.rendered_html td {
text-align: right;
vertical-align: middle;
padding: 0.5em 0.5em;
line-height: normal;
white-space: normal;
max-width: none;
border: none;
}
.jp-RenderedHTMLCommon th,
div.rendered_html th {
font-weight: bold;
}
.jp-RenderedHTMLCommon tbody tr:nth-child(odd),
div.rendered_html tbody tr:nth-child(odd) {
background: #f5f5f5;
}
.jp-RenderedHTMLCommon tbody tr:hover,
div.rendered_html tbody tr:hover {
background: rgba(66, 165, 245, 0.2);
}

View File

@@ -0,0 +1,31 @@
.nbsphinx-gallery {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(160px, 1fr));
gap: 5px;
margin-top: 1em;
margin-bottom: 1em;
}
.nbsphinx-gallery > a {
padding: 5px;
border: 1px dotted currentColor;
border-radius: 2px;
text-align: center;
}
.nbsphinx-gallery > a:hover {
border-style: solid;
}
.nbsphinx-gallery img {
max-width: 100%;
max-height: 100%;
}
.nbsphinx-gallery > a > div:first-child {
display: flex;
align-items: start;
justify-content: center;
height: 120px;
margin-bottom: 5px;
}

View File

@@ -0,0 +1,9 @@
<svg xmlns="http://www.w3.org/2000/svg" width="100" height="100">
<style>
svg { fill: #ccc; }
@media (prefers-color-scheme: dark) {
svg { fill: #999; }
}
</style>
<path d="M 65.365328,74.550032 H 67.52154 V 88.67217 h 7.760238 v 1.814634 h -9.91645 z m -6.447298,0 h 2.156214 V 90.486804 H 58.91803 Z m -9.955303,2.124192 -2.924765,7.931025 h 5.860208 z m -1.216872,-2.124192 h 2.444423 l 6.073692,15.936772 h -2.241606 l -1.451709,-4.088268 h -7.183824 l -1.451707,4.088268 H 41.66149 Z m -21.264514,0 h 2.903417 L 36.45116,87.882271 V 74.550032 h 2.092171 V 90.486804 H 35.639914 L 28.573506,77.154569 v 13.332235 h -2.092165 z m 53.454813,-9.781663 v 5.838858 h 3.458479 q 1.739919,0 2.572513,-0.71518 0.843272,-0.725854 0.843272,-2.209586 0,-1.494406 -0.843272,-2.198912 -0.832594,-0.71518 -2.572513,-0.71518 z m 0,-6.554037 v 4.803447 h 3.191622 q 1.579803,0 2.348355,-0.587087 0.779225,-0.597763 0.779225,-1.814636 0,-1.206199 -0.779225,-1.803962 -0.768552,-0.597762 -2.348355,-0.597762 z m -2.156212,-1.771939 h 5.507949 q 2.465772,0 3.800065,1.024736 1.334288,1.024735 1.334288,2.914092 0,1.462383 -0.683157,2.327003 -0.683157,0.864621 -2.006771,1.078107 1.590471,0.341579 2.46577,1.43036 0.885967,1.078107 0.885967,2.700605 0,2.134866 -1.451708,3.298368 -1.451709,1.163502 -4.130963,1.163502 h -5.72144 z m -18.86154,0 h 3.212969 l 4.066921,10.845118 4.088269,-10.845118 H 73.49953 V 72.379166 H 71.396692 V 58.385121 L 67.287075,69.315634 H 65.120183 L 61.010567,58.385121 v 13.994045 h -2.092165 z m -16.246333,0 h 2.166892 v 9.681616 q 0,2.561839 0.928668,3.693318 0.928667,1.120804 3.010159,1.120804 2.070819,0 2.999486,-1.120804 0.928667,-1.131479 0.928667,-3.693318 v -9.681616 h 2.166886 v 9.948474 q 0,3.116904 -1.547776,4.70738 -1.537104,1.590474 -4.547263,1.590474 -3.020833,0 -4.568615,-1.590474 -1.537104,-1.590476 -1.537104,-4.70738 z m -16.192953,0 h 2.156213 v 6.532689 h 7.83496 v -6.532689 h 2.156213 v 15.936773 h -2.156213 v -7.589448 h -7.83496 v 7.589448 h -2.156213 z m -15.563168,0 h 13.481671 v 1.814636 H 18.740226 V 72.379166 H 16.573339 V 58.257029 H 10.915948 Z M 65.497127,39.809967 q -2.34835,0 -3.736011,1.750588 -1.376987,1.750588 -1.376987,4.771419 0,3.010157 1.376987,4.760745 1.387661,1.750588 3.736011,1.750588 2.348349,0 3.714662,-1.750588 1.376986,-1.750588 1.376986,-4.760745 0,-3.020831 -1.376986,-4.771419 -1.366313,-1.750588 -3.714662,-1.750588 z m 0,-1.750588 q 3.351735,0 5.358506,2.252281 2.006772,2.241606 2.006772,6.020314 0,3.768034 -2.006772,6.020315 -2.006771,2.241606 -5.358506,2.241606 -3.36241,0 -5.379856,-2.241606 -2.006771,-2.241607 -2.006771,-6.020315 0,-3.778708 2.006771,-6.020314 2.017446,-2.252281 5.379856,-2.252281 z m -22.821688,0.288206 h 2.903414 L 52.64525,51.679807 V 38.347585 h 2.092166 V 54.28434 H 51.834001 L 44.767605,40.952119 V 54.28434 h -2.092166 z" />
</svg>

After

Width:  |  Height:  |  Size: 2.8 KiB

BIN
docs/build/html/_static/plus.png vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 90 B

75
docs/build/html/_static/pygments.css vendored Normal file
View File

@@ -0,0 +1,75 @@
pre { line-height: 125%; }
td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
.highlight .hll { background-color: #ffffcc }
.highlight { background: #f8f8f8; }
.highlight .c { color: #3D7B7B; font-style: italic } /* Comment */
.highlight .err { border: 1px solid #FF0000 } /* Error */
.highlight .k { color: #008000; font-weight: bold } /* Keyword */
.highlight .o { color: #666666 } /* Operator */
.highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */
.highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */
.highlight .cp { color: #9C6500 } /* Comment.Preproc */
.highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */
.highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */
.highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */
.highlight .gd { color: #A00000 } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */
.highlight .gr { color: #E40000 } /* Generic.Error */
.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
.highlight .gi { color: #008400 } /* Generic.Inserted */
.highlight .go { color: #717171 } /* Generic.Output */
.highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
.highlight .gt { color: #0044DD } /* Generic.Traceback */
.highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #008000 } /* Keyword.Pseudo */
.highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #B00040 } /* Keyword.Type */
.highlight .m { color: #666666 } /* Literal.Number */
.highlight .s { color: #BA2121 } /* Literal.String */
.highlight .na { color: #687822 } /* Name.Attribute */
.highlight .nb { color: #008000 } /* Name.Builtin */
.highlight .nc { color: #0000FF; font-weight: bold } /* Name.Class */
.highlight .no { color: #880000 } /* Name.Constant */
.highlight .nd { color: #AA22FF } /* Name.Decorator */
.highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */
.highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #0000FF } /* Name.Function */
.highlight .nl { color: #767600 } /* Name.Label */
.highlight .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */
.highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #19177C } /* Name.Variable */
.highlight .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #666666 } /* Literal.Number.Bin */
.highlight .mf { color: #666666 } /* Literal.Number.Float */
.highlight .mh { color: #666666 } /* Literal.Number.Hex */
.highlight .mi { color: #666666 } /* Literal.Number.Integer */
.highlight .mo { color: #666666 } /* Literal.Number.Oct */
.highlight .sa { color: #BA2121 } /* Literal.String.Affix */
.highlight .sb { color: #BA2121 } /* Literal.String.Backtick */
.highlight .sc { color: #BA2121 } /* Literal.String.Char */
.highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */
.highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */
.highlight .s2 { color: #BA2121 } /* Literal.String.Double */
.highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */
.highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */
.highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */
.highlight .sx { color: #008000 } /* Literal.String.Other */
.highlight .sr { color: #A45A77 } /* Literal.String.Regex */
.highlight .s1 { color: #BA2121 } /* Literal.String.Single */
.highlight .ss { color: #19177C } /* Literal.String.Symbol */
.highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #0000FF } /* Name.Function.Magic */
.highlight .vc { color: #19177C } /* Name.Variable.Class */
.highlight .vg { color: #19177C } /* Name.Variable.Global */
.highlight .vi { color: #19177C } /* Name.Variable.Instance */
.highlight .vm { color: #19177C } /* Name.Variable.Magic */
.highlight .il { color: #666666 } /* Literal.Number.Integer.Long */

619
docs/build/html/_static/searchtools.js vendored Normal file
View File

@@ -0,0 +1,619 @@
/*
* searchtools.js
* ~~~~~~~~~~~~~~~~
*
* Sphinx JavaScript utilities for the full-text search.
*
* :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS.
* :license: BSD, see LICENSE for details.
*
*/
"use strict";
/**
* Simple result scoring code.
*/
if (typeof Scorer === "undefined") {
var Scorer = {
// Implement the following function to further tweak the score for each result
// The function takes a result array [docname, title, anchor, descr, score, filename]
// and returns the new score.
/*
score: result => {
const [docname, title, anchor, descr, score, filename] = result
return score
},
*/
// query matches the full name of an object
objNameMatch: 11,
// or matches in the last dotted part of the object name
objPartialMatch: 6,
// Additive scores depending on the priority of the object
objPrio: {
0: 15, // used to be importantResults
1: 5, // used to be objectResults
2: -5, // used to be unimportantResults
},
// Used when the priority is not in the mapping.
objPrioDefault: 0,
// query found in title
title: 15,
partialTitle: 7,
// query found in terms
term: 5,
partialTerm: 2,
};
}
const _removeChildren = (element) => {
while (element && element.lastChild) element.removeChild(element.lastChild);
};
/**
* See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#escaping
*/
const _escapeRegExp = (string) =>
string.replace(/[.*+\-?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string
const _displayItem = (item, searchTerms, highlightTerms) => {
const docBuilder = DOCUMENTATION_OPTIONS.BUILDER;
const docFileSuffix = DOCUMENTATION_OPTIONS.FILE_SUFFIX;
const docLinkSuffix = DOCUMENTATION_OPTIONS.LINK_SUFFIX;
const showSearchSummary = DOCUMENTATION_OPTIONS.SHOW_SEARCH_SUMMARY;
const contentRoot = document.documentElement.dataset.content_root;
const [docName, title, anchor, descr, score, _filename] = item;
let listItem = document.createElement("li");
let requestUrl;
let linkUrl;
if (docBuilder === "dirhtml") {
// dirhtml builder
let dirname = docName + "/";
if (dirname.match(/\/index\/$/))
dirname = dirname.substring(0, dirname.length - 6);
else if (dirname === "index/") dirname = "";
requestUrl = contentRoot + dirname;
linkUrl = requestUrl;
} else {
// normal html builders
requestUrl = contentRoot + docName + docFileSuffix;
linkUrl = docName + docLinkSuffix;
}
let linkEl = listItem.appendChild(document.createElement("a"));
linkEl.href = linkUrl + anchor;
linkEl.dataset.score = score;
linkEl.innerHTML = title;
if (descr) {
listItem.appendChild(document.createElement("span")).innerHTML =
" (" + descr + ")";
// highlight search terms in the description
if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js
highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted"));
}
else if (showSearchSummary)
fetch(requestUrl)
.then((responseData) => responseData.text())
.then((data) => {
if (data)
listItem.appendChild(
Search.makeSearchSummary(data, searchTerms, anchor)
);
// highlight search terms in the summary
if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js
highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted"));
});
Search.output.appendChild(listItem);
};
const _finishSearch = (resultCount) => {
Search.stopPulse();
Search.title.innerText = _("Search Results");
if (!resultCount)
Search.status.innerText = Documentation.gettext(
"Your search did not match any documents. Please make sure that all words are spelled correctly and that you've selected enough categories."
);
else
Search.status.innerText = _(
"Search finished, found ${resultCount} page(s) matching the search query."
).replace('${resultCount}', resultCount);
};
const _displayNextItem = (
results,
resultCount,
searchTerms,
highlightTerms,
) => {
// results left, load the summary and display it
// this is intended to be dynamic (don't sub resultsCount)
if (results.length) {
_displayItem(results.pop(), searchTerms, highlightTerms);
setTimeout(
() => _displayNextItem(results, resultCount, searchTerms, highlightTerms),
5
);
}
// search finished, update title and status message
else _finishSearch(resultCount);
};
// Helper function used by query() to order search results.
// Each input is an array of [docname, title, anchor, descr, score, filename].
// Order the results by score (in opposite order of appearance, since the
// `_displayNextItem` function uses pop() to retrieve items) and then alphabetically.
const _orderResultsByScoreThenName = (a, b) => {
const leftScore = a[4];
const rightScore = b[4];
if (leftScore === rightScore) {
// same score: sort alphabetically
const leftTitle = a[1].toLowerCase();
const rightTitle = b[1].toLowerCase();
if (leftTitle === rightTitle) return 0;
return leftTitle > rightTitle ? -1 : 1; // inverted is intentional
}
return leftScore > rightScore ? 1 : -1;
};
/**
* Default splitQuery function. Can be overridden in ``sphinx.search`` with a
* custom function per language.
*
* The regular expression works by splitting the string on consecutive characters
* that are not Unicode letters, numbers, underscores, or emoji characters.
* This is the same as ``\W+`` in Python, preserving the surrogate pair area.
*/
if (typeof splitQuery === "undefined") {
var splitQuery = (query) => query
.split(/[^\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu)
.filter(term => term) // remove remaining empty strings
}
/**
* Search Module
*/
const Search = {
_index: null,
_queued_query: null,
_pulse_status: -1,
htmlToText: (htmlString, anchor) => {
const htmlElement = new DOMParser().parseFromString(htmlString, 'text/html');
for (const removalQuery of [".headerlinks", "script", "style"]) {
htmlElement.querySelectorAll(removalQuery).forEach((el) => { el.remove() });
}
if (anchor) {
const anchorContent = htmlElement.querySelector(`[role="main"] ${anchor}`);
if (anchorContent) return anchorContent.textContent;
console.warn(
`Anchored content block not found. Sphinx search tries to obtain it via DOM query '[role=main] ${anchor}'. Check your theme or template.`
);
}
// if anchor not specified or not found, fall back to main content
const docContent = htmlElement.querySelector('[role="main"]');
if (docContent) return docContent.textContent;
console.warn(
"Content block not found. Sphinx search tries to obtain it via DOM query '[role=main]'. Check your theme or template."
);
return "";
},
init: () => {
const query = new URLSearchParams(window.location.search).get("q");
document
.querySelectorAll('input[name="q"]')
.forEach((el) => (el.value = query));
if (query) Search.performSearch(query);
},
loadIndex: (url) =>
(document.body.appendChild(document.createElement("script")).src = url),
setIndex: (index) => {
Search._index = index;
if (Search._queued_query !== null) {
const query = Search._queued_query;
Search._queued_query = null;
Search.query(query);
}
},
hasIndex: () => Search._index !== null,
deferQuery: (query) => (Search._queued_query = query),
stopPulse: () => (Search._pulse_status = -1),
startPulse: () => {
if (Search._pulse_status >= 0) return;
const pulse = () => {
Search._pulse_status = (Search._pulse_status + 1) % 4;
Search.dots.innerText = ".".repeat(Search._pulse_status);
if (Search._pulse_status >= 0) window.setTimeout(pulse, 500);
};
pulse();
},
/**
* perform a search for something (or wait until index is loaded)
*/
performSearch: (query) => {
// create the required interface elements
const searchText = document.createElement("h2");
searchText.textContent = _("Searching");
const searchSummary = document.createElement("p");
searchSummary.classList.add("search-summary");
searchSummary.innerText = "";
const searchList = document.createElement("ul");
searchList.classList.add("search");
const out = document.getElementById("search-results");
Search.title = out.appendChild(searchText);
Search.dots = Search.title.appendChild(document.createElement("span"));
Search.status = out.appendChild(searchSummary);
Search.output = out.appendChild(searchList);
const searchProgress = document.getElementById("search-progress");
// Some themes don't use the search progress node
if (searchProgress) {
searchProgress.innerText = _("Preparing search...");
}
Search.startPulse();
// index already loaded, the browser was quick!
if (Search.hasIndex()) Search.query(query);
else Search.deferQuery(query);
},
_parseQuery: (query) => {
// stem the search terms and add them to the correct list
const stemmer = new Stemmer();
const searchTerms = new Set();
const excludedTerms = new Set();
const highlightTerms = new Set();
const objectTerms = new Set(splitQuery(query.toLowerCase().trim()));
splitQuery(query.trim()).forEach((queryTerm) => {
const queryTermLower = queryTerm.toLowerCase();
// maybe skip this "word"
// stopwords array is from language_data.js
if (
stopwords.indexOf(queryTermLower) !== -1 ||
queryTerm.match(/^\d+$/)
)
return;
// stem the word
let word = stemmer.stemWord(queryTermLower);
// select the correct list
if (word[0] === "-") excludedTerms.add(word.substr(1));
else {
searchTerms.add(word);
highlightTerms.add(queryTermLower);
}
});
if (SPHINX_HIGHLIGHT_ENABLED) { // set in sphinx_highlight.js
localStorage.setItem("sphinx_highlight_terms", [...highlightTerms].join(" "))
}
// console.debug("SEARCH: searching for:");
// console.info("required: ", [...searchTerms]);
// console.info("excluded: ", [...excludedTerms]);
return [query, searchTerms, excludedTerms, highlightTerms, objectTerms];
},
/**
* execute search (requires search index to be loaded)
*/
_performSearch: (query, searchTerms, excludedTerms, highlightTerms, objectTerms) => {
const filenames = Search._index.filenames;
const docNames = Search._index.docnames;
const titles = Search._index.titles;
const allTitles = Search._index.alltitles;
const indexEntries = Search._index.indexentries;
// Collect multiple result groups to be sorted separately and then ordered.
// Each is an array of [docname, title, anchor, descr, score, filename].
const normalResults = [];
const nonMainIndexResults = [];
_removeChildren(document.getElementById("search-progress"));
const queryLower = query.toLowerCase().trim();
for (const [title, foundTitles] of Object.entries(allTitles)) {
if (title.toLowerCase().trim().includes(queryLower) && (queryLower.length >= title.length/2)) {
for (const [file, id] of foundTitles) {
let score = Math.round(100 * queryLower.length / title.length)
normalResults.push([
docNames[file],
titles[file] !== title ? `${titles[file]} > ${title}` : title,
id !== null ? "#" + id : "",
null,
score,
filenames[file],
]);
}
}
}
// search for explicit entries in index directives
for (const [entry, foundEntries] of Object.entries(indexEntries)) {
if (entry.includes(queryLower) && (queryLower.length >= entry.length/2)) {
for (const [file, id, isMain] of foundEntries) {
const score = Math.round(100 * queryLower.length / entry.length);
const result = [
docNames[file],
titles[file],
id ? "#" + id : "",
null,
score,
filenames[file],
];
if (isMain) {
normalResults.push(result);
} else {
nonMainIndexResults.push(result);
}
}
}
}
// lookup as object
objectTerms.forEach((term) =>
normalResults.push(...Search.performObjectSearch(term, objectTerms))
);
// lookup as search terms in fulltext
normalResults.push(...Search.performTermsSearch(searchTerms, excludedTerms));
// let the scorer override scores with a custom scoring function
if (Scorer.score) {
normalResults.forEach((item) => (item[4] = Scorer.score(item)));
nonMainIndexResults.forEach((item) => (item[4] = Scorer.score(item)));
}
// Sort each group of results by score and then alphabetically by name.
normalResults.sort(_orderResultsByScoreThenName);
nonMainIndexResults.sort(_orderResultsByScoreThenName);
// Combine the result groups in (reverse) order.
// Non-main index entries are typically arbitrary cross-references,
// so display them after other results.
let results = [...nonMainIndexResults, ...normalResults];
// remove duplicate search results
// note the reversing of results, so that in the case of duplicates, the highest-scoring entry is kept
let seen = new Set();
results = results.reverse().reduce((acc, result) => {
let resultStr = result.slice(0, 4).concat([result[5]]).map(v => String(v)).join(',');
if (!seen.has(resultStr)) {
acc.push(result);
seen.add(resultStr);
}
return acc;
}, []);
return results.reverse();
},
query: (query) => {
const [searchQuery, searchTerms, excludedTerms, highlightTerms, objectTerms] = Search._parseQuery(query);
const results = Search._performSearch(searchQuery, searchTerms, excludedTerms, highlightTerms, objectTerms);
// for debugging
//Search.lastresults = results.slice(); // a copy
// console.info("search results:", Search.lastresults);
// print the results
_displayNextItem(results, results.length, searchTerms, highlightTerms);
},
/**
* search for object names
*/
performObjectSearch: (object, objectTerms) => {
const filenames = Search._index.filenames;
const docNames = Search._index.docnames;
const objects = Search._index.objects;
const objNames = Search._index.objnames;
const titles = Search._index.titles;
const results = [];
const objectSearchCallback = (prefix, match) => {
const name = match[4]
const fullname = (prefix ? prefix + "." : "") + name;
const fullnameLower = fullname.toLowerCase();
if (fullnameLower.indexOf(object) < 0) return;
let score = 0;
const parts = fullnameLower.split(".");
// check for different match types: exact matches of full name or
// "last name" (i.e. last dotted part)
if (fullnameLower === object || parts.slice(-1)[0] === object)
score += Scorer.objNameMatch;
else if (parts.slice(-1)[0].indexOf(object) > -1)
score += Scorer.objPartialMatch; // matches in last name
const objName = objNames[match[1]][2];
const title = titles[match[0]];
// If more than one term searched for, we require other words to be
// found in the name/title/description
const otherTerms = new Set(objectTerms);
otherTerms.delete(object);
if (otherTerms.size > 0) {
const haystack = `${prefix} ${name} ${objName} ${title}`.toLowerCase();
if (
[...otherTerms].some((otherTerm) => haystack.indexOf(otherTerm) < 0)
)
return;
}
let anchor = match[3];
if (anchor === "") anchor = fullname;
else if (anchor === "-") anchor = objNames[match[1]][1] + "-" + fullname;
const descr = objName + _(", in ") + title;
// add custom score for some objects according to scorer
if (Scorer.objPrio.hasOwnProperty(match[2]))
score += Scorer.objPrio[match[2]];
else score += Scorer.objPrioDefault;
results.push([
docNames[match[0]],
fullname,
"#" + anchor,
descr,
score,
filenames[match[0]],
]);
};
Object.keys(objects).forEach((prefix) =>
objects[prefix].forEach((array) =>
objectSearchCallback(prefix, array)
)
);
return results;
},
/**
* search for full-text terms in the index
*/
performTermsSearch: (searchTerms, excludedTerms) => {
// prepare search
const terms = Search._index.terms;
const titleTerms = Search._index.titleterms;
const filenames = Search._index.filenames;
const docNames = Search._index.docnames;
const titles = Search._index.titles;
const scoreMap = new Map();
const fileMap = new Map();
// perform the search on the required terms
searchTerms.forEach((word) => {
const files = [];
const arr = [
{ files: terms[word], score: Scorer.term },
{ files: titleTerms[word], score: Scorer.title },
];
// add support for partial matches
if (word.length > 2) {
const escapedWord = _escapeRegExp(word);
if (!terms.hasOwnProperty(word)) {
Object.keys(terms).forEach((term) => {
if (term.match(escapedWord))
arr.push({ files: terms[term], score: Scorer.partialTerm });
});
}
if (!titleTerms.hasOwnProperty(word)) {
Object.keys(titleTerms).forEach((term) => {
if (term.match(escapedWord))
arr.push({ files: titleTerms[term], score: Scorer.partialTitle });
});
}
}
// no match but word was a required one
if (arr.every((record) => record.files === undefined)) return;
// found search word in contents
arr.forEach((record) => {
if (record.files === undefined) return;
let recordFiles = record.files;
if (recordFiles.length === undefined) recordFiles = [recordFiles];
files.push(...recordFiles);
// set score for the word in each file
recordFiles.forEach((file) => {
if (!scoreMap.has(file)) scoreMap.set(file, {});
scoreMap.get(file)[word] = record.score;
});
});
// create the mapping
files.forEach((file) => {
if (!fileMap.has(file)) fileMap.set(file, [word]);
else if (fileMap.get(file).indexOf(word) === -1) fileMap.get(file).push(word);
});
});
// now check if the files don't contain excluded terms
const results = [];
for (const [file, wordList] of fileMap) {
// check if all requirements are matched
// as search terms with length < 3 are discarded
const filteredTermCount = [...searchTerms].filter(
(term) => term.length > 2
).length;
if (
wordList.length !== searchTerms.size &&
wordList.length !== filteredTermCount
)
continue;
// ensure that none of the excluded terms is in the search result
if (
[...excludedTerms].some(
(term) =>
terms[term] === file ||
titleTerms[term] === file ||
(terms[term] || []).includes(file) ||
(titleTerms[term] || []).includes(file)
)
)
break;
// select one (max) score for the file.
const score = Math.max(...wordList.map((w) => scoreMap.get(file)[w]));
// add result to the result list
results.push([
docNames[file],
titles[file],
"",
null,
score,
filenames[file],
]);
}
return results;
},
/**
* helper function to return a node containing the
* search summary for a given text. keywords is a list
* of stemmed words.
*/
makeSearchSummary: (htmlText, keywords, anchor) => {
const text = Search.htmlToText(htmlText, anchor);
if (text === "") return null;
const textLower = text.toLowerCase();
const actualStartPosition = [...keywords]
.map((k) => textLower.indexOf(k.toLowerCase()))
.filter((i) => i > -1)
.slice(-1)[0];
const startWithContext = Math.max(actualStartPosition - 120, 0);
const top = startWithContext === 0 ? "" : "...";
const tail = startWithContext + 240 < text.length ? "..." : "";
let summary = document.createElement("p");
summary.classList.add("context");
summary.textContent = top + text.substr(startWithContext, 240).trim() + tail;
return summary;
},
};
_ready(Search.init);

View File

@@ -0,0 +1,154 @@
/* Highlighting utilities for Sphinx HTML documentation. */
"use strict";
const SPHINX_HIGHLIGHT_ENABLED = true
/**
* highlight a given string on a node by wrapping it in
* span elements with the given class name.
*/
const _highlight = (node, addItems, text, className) => {
if (node.nodeType === Node.TEXT_NODE) {
const val = node.nodeValue;
const parent = node.parentNode;
const pos = val.toLowerCase().indexOf(text);
if (
pos >= 0 &&
!parent.classList.contains(className) &&
!parent.classList.contains("nohighlight")
) {
let span;
const closestNode = parent.closest("body, svg, foreignObject");
const isInSVG = closestNode && closestNode.matches("svg");
if (isInSVG) {
span = document.createElementNS("http://www.w3.org/2000/svg", "tspan");
} else {
span = document.createElement("span");
span.classList.add(className);
}
span.appendChild(document.createTextNode(val.substr(pos, text.length)));
const rest = document.createTextNode(val.substr(pos + text.length));
parent.insertBefore(
span,
parent.insertBefore(
rest,
node.nextSibling
)
);
node.nodeValue = val.substr(0, pos);
/* There may be more occurrences of search term in this node. So call this
* function recursively on the remaining fragment.
*/
_highlight(rest, addItems, text, className);
if (isInSVG) {
const rect = document.createElementNS(
"http://www.w3.org/2000/svg",
"rect"
);
const bbox = parent.getBBox();
rect.x.baseVal.value = bbox.x;
rect.y.baseVal.value = bbox.y;
rect.width.baseVal.value = bbox.width;
rect.height.baseVal.value = bbox.height;
rect.setAttribute("class", className);
addItems.push({ parent: parent, target: rect });
}
}
} else if (node.matches && !node.matches("button, select, textarea")) {
node.childNodes.forEach((el) => _highlight(el, addItems, text, className));
}
};
const _highlightText = (thisNode, text, className) => {
let addItems = [];
_highlight(thisNode, addItems, text, className);
addItems.forEach((obj) =>
obj.parent.insertAdjacentElement("beforebegin", obj.target)
);
};
/**
* Small JavaScript module for the documentation.
*/
const SphinxHighlight = {
/**
* highlight the search words provided in localstorage in the text
*/
highlightSearchWords: () => {
if (!SPHINX_HIGHLIGHT_ENABLED) return; // bail if no highlight
// get and clear terms from localstorage
const url = new URL(window.location);
const highlight =
localStorage.getItem("sphinx_highlight_terms")
|| url.searchParams.get("highlight")
|| "";
localStorage.removeItem("sphinx_highlight_terms")
url.searchParams.delete("highlight");
window.history.replaceState({}, "", url);
// get individual terms from highlight string
const terms = highlight.toLowerCase().split(/\s+/).filter(x => x);
if (terms.length === 0) return; // nothing to do
// There should never be more than one element matching "div.body"
const divBody = document.querySelectorAll("div.body");
const body = divBody.length ? divBody[0] : document.querySelector("body");
window.setTimeout(() => {
terms.forEach((term) => _highlightText(body, term, "highlighted"));
}, 10);
const searchBox = document.getElementById("searchbox");
if (searchBox === null) return;
searchBox.appendChild(
document
.createRange()
.createContextualFragment(
'<p class="highlight-link">' +
'<a href="javascript:SphinxHighlight.hideSearchWords()">' +
_("Hide Search Matches") +
"</a></p>"
)
);
},
/**
* helper function to hide the search marks again
*/
hideSearchWords: () => {
document
.querySelectorAll("#searchbox .highlight-link")
.forEach((el) => el.remove());
document
.querySelectorAll("span.highlighted")
.forEach((el) => el.classList.remove("highlighted"));
localStorage.removeItem("sphinx_highlight_terms")
},
initEscapeListener: () => {
// only install a listener if it is really needed
if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return;
document.addEventListener("keydown", (event) => {
// bail for input elements
if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return;
// bail with special keys
if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return;
if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) {
SphinxHighlight.hideSearchWords();
event.preventDefault();
}
});
},
};
_ready(() => {
/* Do not call highlightSearchWords() when we are on the search page.
* It will highlight words from the *previous* search query.
*/
if (typeof Search === "undefined") SphinxHighlight.highlightSearchWords();
SphinxHighlight.initEscapeListener();
});

407
docs/build/html/genindex.html vendored Normal file
View File

@@ -0,0 +1,407 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="./">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Index &mdash; DIMA 1.0.0 documentation</title>
<link rel="stylesheet" type="text/css" href="_static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="_static/jquery.js?v=5d32c60e"></script>
<script src="_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="_static/documentation_options.js?v=8d563738"></script>
<script src="_static/doctools.js?v=9a2dae69"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script src="_static/js/theme.js"></script>
<link rel="index" title="Index" href="#" />
<link rel="search" title="Search" href="search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="index.html" class="icon icon-home">
DIMA
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="modules/src.html">HDF5 Data Operations</a></li>
<li class="toctree-l1"><a class="reference internal" href="modules/src.html#module-src.hdf5_writer">HDF5 Writer</a></li>
<li class="toctree-l1"><a class="reference internal" href="modules/src.html#module-src.hdf5_vis">Data Visualization</a></li>
<li class="toctree-l1"><a class="reference internal" href="modules/pipelines.html">Pipelines and workflows</a></li>
<li class="toctree-l1"><a class="reference internal" href="modules/utils.html">Data Structure Conversion</a></li>
<li class="toctree-l1"><a class="reference internal" href="modules/notebooks.html">Notebooks</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">DIMA</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item active">Index</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1 id="index">Index</h1>
<div class="genindex-jumpbox">
<a href="#A"><strong>A</strong></a>
| <a href="#C"><strong>C</strong></a>
| <a href="#D"><strong>D</strong></a>
| <a href="#E"><strong>E</strong></a>
| <a href="#G"><strong>G</strong></a>
| <a href="#H"><strong>H</strong></a>
| <a href="#I"><strong>I</strong></a>
| <a href="#L"><strong>L</strong></a>
| <a href="#M"><strong>M</strong></a>
| <a href="#N"><strong>N</strong></a>
| <a href="#P"><strong>P</strong></a>
| <a href="#R"><strong>R</strong></a>
| <a href="#S"><strong>S</strong></a>
| <a href="#T"><strong>T</strong></a>
| <a href="#U"><strong>U</strong></a>
| <a href="#V"><strong>V</strong></a>
</div>
<h2 id="A">A</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="modules/src.html#src.hdf5_ops.HDF5DataOpsManager.append_dataset">append_dataset() (src.hdf5_ops.HDF5DataOpsManager method)</a>
</li>
<li><a href="modules/src.html#src.hdf5_ops.HDF5DataOpsManager.append_metadata">append_metadata() (src.hdf5_ops.HDF5DataOpsManager method)</a>
</li>
</ul></td>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="modules/utils.html#utils.g5505_utils.augment_with_filenumber">augment_with_filenumber() (in module utils.g5505_utils)</a>
</li>
<li><a href="modules/utils.html#utils.g5505_utils.augment_with_filetype">augment_with_filetype() (in module utils.g5505_utils)</a>
</li>
</ul></td>
</tr></table>
<h2 id="C">C</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="modules/utils.html#utils.g5505_utils.convert_attrdict_to_np_structured_array">convert_attrdict_to_np_structured_array() (in module utils.g5505_utils)</a>
</li>
<li><a href="modules/utils.html#utils.g5505_utils.convert_dataframe_to_np_structured_array">convert_dataframe_to_np_structured_array() (in module utils.g5505_utils)</a>
</li>
<li><a href="modules/utils.html#utils.g5505_utils.convert_string_to_bytes">convert_string_to_bytes() (in module utils.g5505_utils)</a>
</li>
<li><a href="modules/utils.html#utils.g5505_utils.copy_directory_with_contraints">copy_directory_with_contraints() (in module utils.g5505_utils)</a>
</li>
</ul></td>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="modules/pipelines.html#pipelines.data_integration.copy_subtree_and_create_hdf5">copy_subtree_and_create_hdf5() (in module pipelines.data_integration)</a>
</li>
<li><a href="modules/pipelines.html#pipelines.metadata_revision.count">count() (in module pipelines.metadata_revision)</a>
</li>
<li><a href="modules/src.html#src.hdf5_writer.create_hdf5_file_from_dataframe">create_hdf5_file_from_dataframe() (in module src.hdf5_writer)</a>
</li>
<li><a href="modules/src.html#src.hdf5_writer.create_hdf5_file_from_filesystem_path">create_hdf5_file_from_filesystem_path() (in module src.hdf5_writer)</a>
</li>
<li><a href="modules/utils.html#utils.g5505_utils.created_at">created_at() (in module utils.g5505_utils)</a>
</li>
</ul></td>
</tr></table>
<h2 id="D">D</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="modules/src.html#src.hdf5_ops.HDF5DataOpsManager.delete_metadata">delete_metadata() (src.hdf5_ops.HDF5DataOpsManager method)</a>
</li>
</ul></td>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="modules/src.html#src.hdf5_vis.display_group_hierarchy_on_a_treemap">display_group_hierarchy_on_a_treemap() (in module src.hdf5_vis)</a>
</li>
</ul></td>
</tr></table>
<h2 id="E">E</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="modules/src.html#src.hdf5_ops.HDF5DataOpsManager.extract_and_load_dataset_metadata">extract_and_load_dataset_metadata() (src.hdf5_ops.HDF5DataOpsManager method)</a>
</li>
</ul></td>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="modules/src.html#src.hdf5_ops.HDF5DataOpsManager.extract_dataset_as_dataframe">extract_dataset_as_dataframe() (src.hdf5_ops.HDF5DataOpsManager method)</a>
</li>
</ul></td>
</tr></table>
<h2 id="G">G</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="modules/src.html#src.hdf5_ops.get_groups_at_a_level">get_groups_at_a_level() (in module src.hdf5_ops)</a>
</li>
<li><a href="modules/src.html#src.hdf5_ops.HDF5DataOpsManager.get_metadata">get_metadata() (src.hdf5_ops.HDF5DataOpsManager method)</a>
</li>
</ul></td>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="modules/src.html#src.hdf5_ops.get_parent_child_relationships">get_parent_child_relationships() (in module src.hdf5_ops)</a>
</li>
<li><a href="modules/utils.html#utils.g5505_utils.group_by_df_column">group_by_df_column() (in module utils.g5505_utils)</a>
</li>
</ul></td>
</tr></table>
<h2 id="H">H</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="modules/src.html#src.hdf5_ops.HDF5DataOpsManager">HDF5DataOpsManager (class in src.hdf5_ops)</a>
</li>
</ul></td>
</tr></table>
<h2 id="I">I</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="modules/utils.html#utils.g5505_utils.infer_units">infer_units() (in module utils.g5505_utils)</a>
</li>
<li><a href="modules/utils.html#utils.g5505_utils.is_callable_list">is_callable_list() (in module utils.g5505_utils)</a>
</li>
</ul></td>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="modules/utils.html#utils.g5505_utils.is_str_list">is_str_list() (in module utils.g5505_utils)</a>
</li>
<li><a href="modules/utils.html#utils.g5505_utils.is_structured_array">is_structured_array() (in module utils.g5505_utils)</a>
</li>
</ul></td>
</tr></table>
<h2 id="L">L</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="modules/pipelines.html#pipelines.data_integration.load_config_and_setup_logging">load_config_and_setup_logging() (in module pipelines.data_integration)</a>
</li>
</ul></td>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="modules/src.html#src.hdf5_ops.HDF5DataOpsManager.load_file_obj">load_file_obj() (src.hdf5_ops.HDF5DataOpsManager method)</a>
</li>
<li><a href="modules/pipelines.html#pipelines.metadata_revision.load_yaml">load_yaml() (in module pipelines.metadata_revision)</a>
</li>
</ul></td>
</tr></table>
<h2 id="M">M</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="modules/utils.html#utils.g5505_utils.make_file_copy">make_file_copy() (in module utils.g5505_utils)</a>
</li>
<li>
module
<ul>
<li><a href="modules/notebooks.html#module-notebooks">notebooks</a>
</li>
<li><a href="modules/pipelines.html#module-pipelines.data_integration">pipelines.data_integration</a>
</li>
<li><a href="modules/pipelines.html#module-pipelines.metadata_revision">pipelines.metadata_revision</a>
</li>
<li><a href="modules/src.html#module-src.hdf5_ops">src.hdf5_ops</a>
</li>
<li><a href="modules/src.html#module-src.hdf5_vis">src.hdf5_vis</a>
</li>
<li><a href="modules/src.html#module-src.hdf5_writer">src.hdf5_writer</a>
</li>
<li><a href="modules/utils.html#module-utils.g5505_utils">utils.g5505_utils</a>
</li>
</ul></li>
</ul></td>
</tr></table>
<h2 id="N">N</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
<td style="width: 33%; vertical-align: top;"><ul>
<li>
notebooks
<ul>
<li><a href="modules/notebooks.html#module-notebooks">module</a>
</li>
</ul></li>
</ul></td>
</tr></table>
<h2 id="P">P</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
<td style="width: 33%; vertical-align: top;"><ul>
<li>
pipelines.data_integration
<ul>
<li><a href="modules/pipelines.html#module-pipelines.data_integration">module</a>
</li>
</ul></li>
</ul></td>
<td style="width: 33%; vertical-align: top;"><ul>
<li>
pipelines.metadata_revision
<ul>
<li><a href="modules/pipelines.html#module-pipelines.metadata_revision">module</a>
</li>
</ul></li>
<li><a href="modules/utils.html#utils.g5505_utils.progressBar">progressBar() (in module utils.g5505_utils)</a>
</li>
</ul></td>
</tr></table>
<h2 id="R">R</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="modules/src.html#src.hdf5_ops.read_mtable_as_dataframe">read_mtable_as_dataframe() (in module src.hdf5_ops)</a>
</li>
<li><a href="modules/src.html#src.hdf5_ops.HDF5DataOpsManager.reformat_datetime_column">reformat_datetime_column() (src.hdf5_ops.HDF5DataOpsManager method)</a>
</li>
</ul></td>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="modules/src.html#src.hdf5_ops.HDF5DataOpsManager.rename_metadata">rename_metadata() (src.hdf5_ops.HDF5DataOpsManager method)</a>
</li>
<li><a href="modules/pipelines.html#pipelines.data_integration.run_pipeline">run_pipeline() (in module pipelines.data_integration)</a>
</li>
</ul></td>
</tr></table>
<h2 id="S">S</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="modules/utils.html#utils.g5505_utils.sanitize_dataframe">sanitize_dataframe() (in module utils.g5505_utils)</a>
</li>
<li><a href="modules/src.html#src.hdf5_writer.save_processed_dataframe_to_hdf5">save_processed_dataframe_to_hdf5() (in module src.hdf5_writer)</a>
</li>
<li><a href="modules/src.html#src.hdf5_ops.serialize_metadata">serialize_metadata() (in module src.hdf5_ops)</a>
</li>
<li><a href="modules/utils.html#utils.g5505_utils.setup_logging">setup_logging() (in module utils.g5505_utils)</a>
</li>
<li><a href="modules/utils.html#utils.g5505_utils.split_sample_col_into_sample_and_data_quality_cols">split_sample_col_into_sample_and_data_quality_cols() (in module utils.g5505_utils)</a>
</li>
</ul></td>
<td style="width: 33%; vertical-align: top;"><ul>
<li>
src.hdf5_ops
<ul>
<li><a href="modules/src.html#module-src.hdf5_ops">module</a>
</li>
</ul></li>
<li>
src.hdf5_vis
<ul>
<li><a href="modules/src.html#module-src.hdf5_vis">module</a>
</li>
</ul></li>
<li>
src.hdf5_writer
<ul>
<li><a href="modules/src.html#module-src.hdf5_writer">module</a>
</li>
</ul></li>
</ul></td>
</tr></table>
<h2 id="T">T</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="modules/utils.html#utils.g5505_utils.to_serializable_dtype">to_serializable_dtype() (in module utils.g5505_utils)</a>
</li>
</ul></td>
</tr></table>
<h2 id="U">U</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="modules/src.html#src.hdf5_ops.HDF5DataOpsManager.unload_file_obj">unload_file_obj() (src.hdf5_ops.HDF5DataOpsManager method)</a>
</li>
<li><a href="modules/src.html#src.hdf5_ops.HDF5DataOpsManager.update_file">update_file() (src.hdf5_ops.HDF5DataOpsManager method)</a>
</li>
<li><a href="modules/pipelines.html#pipelines.metadata_revision.update_hdf5_file_with_review">update_hdf5_file_with_review() (in module pipelines.metadata_revision)</a>
</li>
</ul></td>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="modules/src.html#src.hdf5_ops.HDF5DataOpsManager.update_metadata">update_metadata() (src.hdf5_ops.HDF5DataOpsManager method)</a>
</li>
<li>
utils.g5505_utils
<ul>
<li><a href="modules/utils.html#module-utils.g5505_utils">module</a>
</li>
</ul></li>
</ul></td>
</tr></table>
<h2 id="V">V</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="modules/pipelines.html#pipelines.metadata_revision.validate_yaml_dict">validate_yaml_dict() (in module pipelines.metadata_revision)</a>
</li>
</ul></td>
</tr></table>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, JFFO.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

178
docs/build/html/index.html vendored Normal file
View File

@@ -0,0 +1,178 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="./">
<head>
<meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Welcome to DIMAs documentation! &mdash; DIMA 1.0.0 documentation</title>
<link rel="stylesheet" type="text/css" href="_static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="_static/jquery.js?v=5d32c60e"></script>
<script src="_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="_static/documentation_options.js?v=8d563738"></script>
<script src="_static/doctools.js?v=9a2dae69"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script src="_static/js/theme.js"></script>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="HDF5 Data Operations" href="modules/src.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="#" class="icon icon-home">
DIMA
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="modules/src.html">HDF5 Data Operations</a></li>
<li class="toctree-l1"><a class="reference internal" href="modules/src.html#module-src.hdf5_writer">HDF5 Writer</a></li>
<li class="toctree-l1"><a class="reference internal" href="modules/src.html#module-src.hdf5_vis">Data Visualization</a></li>
<li class="toctree-l1"><a class="reference internal" href="modules/pipelines.html">Pipelines and workflows</a></li>
<li class="toctree-l1"><a class="reference internal" href="modules/utils.html">Data Structure Conversion</a></li>
<li class="toctree-l1"><a class="reference internal" href="modules/notebooks.html">Notebooks</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="#">DIMA</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="#" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item active">Welcome to DIMAs documentation!</li>
<li class="wy-breadcrumbs-aside">
<a href="_sources/index.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<section id="welcome-to-dima-s-documentation">
<h1>Welcome to DIMAs documentation!<a class="headerlink" href="#welcome-to-dima-s-documentation" title="Link to this heading"></a></h1>
<div class="toctree-wrapper compound">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="modules/src.html">HDF5 Data Operations</a><ul>
<li class="toctree-l2"><a class="reference internal" href="modules/src.html#src.hdf5_ops.HDF5DataOpsManager"><code class="docutils literal notranslate"><span class="pre">HDF5DataOpsManager</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/src.html#src.hdf5_ops.get_groups_at_a_level"><code class="docutils literal notranslate"><span class="pre">get_groups_at_a_level()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/src.html#src.hdf5_ops.get_parent_child_relationships"><code class="docutils literal notranslate"><span class="pre">get_parent_child_relationships()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/src.html#src.hdf5_ops.read_mtable_as_dataframe"><code class="docutils literal notranslate"><span class="pre">read_mtable_as_dataframe()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/src.html#src.hdf5_ops.serialize_metadata"><code class="docutils literal notranslate"><span class="pre">serialize_metadata()</span></code></a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="modules/src.html#module-src.hdf5_writer">HDF5 Writer</a><ul>
<li class="toctree-l2"><a class="reference internal" href="modules/src.html#src.hdf5_writer.create_hdf5_file_from_dataframe"><code class="docutils literal notranslate"><span class="pre">create_hdf5_file_from_dataframe()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/src.html#src.hdf5_writer.create_hdf5_file_from_filesystem_path"><code class="docutils literal notranslate"><span class="pre">create_hdf5_file_from_filesystem_path()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/src.html#src.hdf5_writer.save_processed_dataframe_to_hdf5"><code class="docutils literal notranslate"><span class="pre">save_processed_dataframe_to_hdf5()</span></code></a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="modules/src.html#module-src.hdf5_vis">Data Visualization</a><ul>
<li class="toctree-l2"><a class="reference internal" href="modules/src.html#src.hdf5_vis.display_group_hierarchy_on_a_treemap"><code class="docutils literal notranslate"><span class="pre">display_group_hierarchy_on_a_treemap()</span></code></a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="modules/pipelines.html">Pipelines and workflows</a><ul>
<li class="toctree-l2"><a class="reference internal" href="modules/pipelines.html#pipelines.data_integration.copy_subtree_and_create_hdf5"><code class="docutils literal notranslate"><span class="pre">copy_subtree_and_create_hdf5()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/pipelines.html#pipelines.data_integration.load_config_and_setup_logging"><code class="docutils literal notranslate"><span class="pre">load_config_and_setup_logging()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/pipelines.html#pipelines.data_integration.run_pipeline"><code class="docutils literal notranslate"><span class="pre">run_pipeline()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/pipelines.html#pipelines.metadata_revision.count"><code class="docutils literal notranslate"><span class="pre">count()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/pipelines.html#pipelines.metadata_revision.load_yaml"><code class="docutils literal notranslate"><span class="pre">load_yaml()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/pipelines.html#pipelines.metadata_revision.update_hdf5_file_with_review"><code class="docutils literal notranslate"><span class="pre">update_hdf5_file_with_review()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/pipelines.html#pipelines.metadata_revision.validate_yaml_dict"><code class="docutils literal notranslate"><span class="pre">validate_yaml_dict()</span></code></a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="modules/utils.html">Data Structure Conversion</a><ul>
<li class="toctree-l2"><a class="reference internal" href="modules/utils.html#utils.g5505_utils.augment_with_filenumber"><code class="docutils literal notranslate"><span class="pre">augment_with_filenumber()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/utils.html#utils.g5505_utils.augment_with_filetype"><code class="docutils literal notranslate"><span class="pre">augment_with_filetype()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/utils.html#utils.g5505_utils.convert_attrdict_to_np_structured_array"><code class="docutils literal notranslate"><span class="pre">convert_attrdict_to_np_structured_array()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/utils.html#utils.g5505_utils.convert_dataframe_to_np_structured_array"><code class="docutils literal notranslate"><span class="pre">convert_dataframe_to_np_structured_array()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/utils.html#utils.g5505_utils.convert_string_to_bytes"><code class="docutils literal notranslate"><span class="pre">convert_string_to_bytes()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/utils.html#utils.g5505_utils.copy_directory_with_contraints"><code class="docutils literal notranslate"><span class="pre">copy_directory_with_contraints()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/utils.html#utils.g5505_utils.created_at"><code class="docutils literal notranslate"><span class="pre">created_at()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/utils.html#utils.g5505_utils.group_by_df_column"><code class="docutils literal notranslate"><span class="pre">group_by_df_column()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/utils.html#utils.g5505_utils.infer_units"><code class="docutils literal notranslate"><span class="pre">infer_units()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/utils.html#utils.g5505_utils.is_callable_list"><code class="docutils literal notranslate"><span class="pre">is_callable_list()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/utils.html#utils.g5505_utils.is_str_list"><code class="docutils literal notranslate"><span class="pre">is_str_list()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/utils.html#utils.g5505_utils.is_structured_array"><code class="docutils literal notranslate"><span class="pre">is_structured_array()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/utils.html#utils.g5505_utils.make_file_copy"><code class="docutils literal notranslate"><span class="pre">make_file_copy()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/utils.html#utils.g5505_utils.progressBar"><code class="docutils literal notranslate"><span class="pre">progressBar()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/utils.html#utils.g5505_utils.sanitize_dataframe"><code class="docutils literal notranslate"><span class="pre">sanitize_dataframe()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/utils.html#utils.g5505_utils.setup_logging"><code class="docutils literal notranslate"><span class="pre">setup_logging()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/utils.html#utils.g5505_utils.split_sample_col_into_sample_and_data_quality_cols"><code class="docutils literal notranslate"><span class="pre">split_sample_col_into_sample_and_data_quality_cols()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="modules/utils.html#utils.g5505_utils.to_serializable_dtype"><code class="docutils literal notranslate"><span class="pre">to_serializable_dtype()</span></code></a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="modules/notebooks.html">Notebooks</a></li>
</ul>
</div>
</section>
<section id="indices-and-tables">
<h1>Indices and tables<a class="headerlink" href="#indices-and-tables" title="Link to this heading"></a></h1>
<ul class="simple">
<li><p><a class="reference internal" href="genindex.html"><span class="std std-ref">Index</span></a></p></li>
<li><p><a class="reference internal" href="py-modindex.html"><span class="std std-ref">Module Index</span></a></p></li>
<li><p><a class="reference internal" href="search.html"><span class="std std-ref">Search Page</span></a></p></li>
</ul>
</section>
</div>
</div>
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
<a href="modules/src.html" class="btn btn-neutral float-right" title="HDF5 Data Operations" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
</div>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, JFFO.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

109
docs/build/html/modules/instruments.html vendored Normal file
View File

@@ -0,0 +1,109 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../">
<head>
<meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>&lt;no title&gt; &mdash; DIMA 1.0.0 documentation</title>
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../_static/jquery.js?v=5d32c60e"></script>
<script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../_static/documentation_options.js?v=8d563738"></script>
<script src="../_static/doctools.js?v=9a2dae69"></script>
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script src="../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../index.html" class="icon icon-home">
DIMA
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="src.html">HDF5 data operations</a></li>
<li class="toctree-l1"><a class="reference internal" href="src.html#data-integration-with-hdf5">Data integration with HDF5</a></li>
<li class="toctree-l1"><a class="reference internal" href="src.html#module-pipelines.data_integration">Pipelines and workflows</a></li>
<li class="toctree-l1"><a class="reference internal" href="src.html#module-src.hdf5_vis">Data visualization</a></li>
<li class="toctree-l1"><a class="reference internal" href="src.html#module-utils.g5505_utils">Utilities</a></li>
<li class="toctree-l1"><a class="reference internal" href="utils.html">Utilities</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../index.html">DIMA</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item active">&lt;no title&gt;</li>
<li class="wy-breadcrumbs-aside">
<a href="../_sources/modules/instruments.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, JFFO.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

111
docs/build/html/modules/notebooks.html vendored Normal file
View File

@@ -0,0 +1,111 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../">
<head>
<meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Notebooks &mdash; DIMA 1.0.0 documentation</title>
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../_static/jquery.js?v=5d32c60e"></script>
<script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../_static/documentation_options.js?v=8d563738"></script>
<script src="../_static/doctools.js?v=9a2dae69"></script>
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script src="../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../index.html" class="icon icon-home">
DIMA
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="src.html">HDF5 data operations</a></li>
<li class="toctree-l1"><a class="reference internal" href="src.html#module-src.hdf5_writer">Data integration with HDF5</a></li>
<li class="toctree-l1"><a class="reference internal" href="src.html#module-src.hdf5_vis">Data visualization</a></li>
<li class="toctree-l1"><a class="reference internal" href="pipelines.html">Pipelines and workflows</a></li>
<li class="toctree-l1"><a class="reference internal" href="utils.html">Utilities</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../index.html">DIMA</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item active">Notebooks</li>
<li class="wy-breadcrumbs-aside">
<a href="../_sources/modules/notebooks.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<section id="module-notebooks">
<span id="notebooks"></span><h1>Notebooks<a class="headerlink" href="#module-notebooks" title="Link to this heading"></a></h1>
</section>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, JFFO.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

194
docs/build/html/modules/pipelines.html vendored Normal file
View File

@@ -0,0 +1,194 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../">
<head>
<meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Pipelines and workflows &mdash; DIMA 1.0.0 documentation</title>
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../_static/jquery.js?v=5d32c60e"></script>
<script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../_static/documentation_options.js?v=8d563738"></script>
<script src="../_static/doctools.js?v=9a2dae69"></script>
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script src="../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Utilities" href="utils.html" />
<link rel="prev" title="HDF5 data operations" href="src.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../index.html" class="icon icon-home">
DIMA
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="src.html">HDF5 data operations</a></li>
<li class="toctree-l1"><a class="reference internal" href="src.html#module-src.hdf5_writer">Data integration with HDF5</a></li>
<li class="toctree-l1"><a class="reference internal" href="src.html#module-src.hdf5_vis">Data visualization</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Pipelines and workflows</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#pipelines.data_integration.copy_subtree_and_create_hdf5"><code class="docutils literal notranslate"><span class="pre">copy_subtree_and_create_hdf5()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#pipelines.data_integration.load_config_and_setup_logging"><code class="docutils literal notranslate"><span class="pre">load_config_and_setup_logging()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#pipelines.data_integration.run_pipeline"><code class="docutils literal notranslate"><span class="pre">run_pipeline()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#pipelines.metadata_revision.count"><code class="docutils literal notranslate"><span class="pre">count()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#pipelines.metadata_revision.load_yaml"><code class="docutils literal notranslate"><span class="pre">load_yaml()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#pipelines.metadata_revision.update_hdf5_file_with_review"><code class="docutils literal notranslate"><span class="pre">update_hdf5_file_with_review()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#pipelines.metadata_revision.validate_yaml_dict"><code class="docutils literal notranslate"><span class="pre">validate_yaml_dict()</span></code></a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="utils.html">Utilities</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../index.html">DIMA</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item active">Pipelines and workflows</li>
<li class="wy-breadcrumbs-aside">
<a href="../_sources/modules/pipelines.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<section id="module-pipelines.data_integration">
<span id="pipelines-and-workflows"></span><h1>Pipelines and workflows<a class="headerlink" href="#module-pipelines.data_integration" title="Link to this heading"></a></h1>
<dl class="py function">
<dt class="sig sig-object py" id="pipelines.data_integration.copy_subtree_and_create_hdf5">
<span class="sig-prename descclassname"><span class="pre">pipelines.data_integration.</span></span><span class="sig-name descname"><span class="pre">copy_subtree_and_create_hdf5</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">src</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dst</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">select_dir_keywords</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">select_file_keywords</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">allowed_file_extensions</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">root_metadata_dict</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/pipelines/data_integration.html#copy_subtree_and_create_hdf5"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#pipelines.data_integration.copy_subtree_and_create_hdf5" title="Link to this definition"></a></dt>
<dd><p>Helper function to copy directory with constraints and create HDF5.</p>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="pipelines.data_integration.load_config_and_setup_logging">
<span class="sig-prename descclassname"><span class="pre">pipelines.data_integration.</span></span><span class="sig-name descname"><span class="pre">load_config_and_setup_logging</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">yaml_config_file_path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">log_dir</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/pipelines/data_integration.html#load_config_and_setup_logging"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#pipelines.data_integration.load_config_and_setup_logging" title="Link to this definition"></a></dt>
<dd><p>Load YAML configuration file, set up logging, and validate required keys and datetime_steps.</p>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="pipelines.data_integration.run_pipeline">
<span class="sig-prename descclassname"><span class="pre">pipelines.data_integration.</span></span><span class="sig-name descname"><span class="pre">run_pipeline</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path_to_config_yamlFile</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">log_dir</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'logs/'</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/pipelines/data_integration.html#run_pipeline"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#pipelines.data_integration.run_pipeline" title="Link to this definition"></a></dt>
<dd><p>Integrates data sources specified by the input configuration file into HDF5 files.</p>
<dl class="simple">
<dt>Parameters:</dt><dd><p>yaml_config_file_path (str): Path to the YAML configuration file.
log_dir (str): Directory to save the log file.</p>
</dd>
<dt>Returns:</dt><dd><p>list: List of Paths to the created HDF5 file(s).</p>
</dd>
</dl>
</dd></dl>
<dl class="py function" id="module-pipelines.metadata_revision">
<dt class="sig sig-object py" id="pipelines.metadata_revision.count">
<span class="sig-prename descclassname"><span class="pre">pipelines.metadata_revision.</span></span><span class="sig-name descname"><span class="pre">count</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">hdf5_obj</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">yml_dict</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/pipelines/metadata_revision.html#count"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#pipelines.metadata_revision.count" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="pipelines.metadata_revision.load_yaml">
<span class="sig-prename descclassname"><span class="pre">pipelines.metadata_revision.</span></span><span class="sig-name descname"><span class="pre">load_yaml</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">review_yaml_file</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/pipelines/metadata_revision.html#load_yaml"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#pipelines.metadata_revision.load_yaml" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="pipelines.metadata_revision.update_hdf5_file_with_review">
<span class="sig-prename descclassname"><span class="pre">pipelines.metadata_revision.</span></span><span class="sig-name descname"><span class="pre">update_hdf5_file_with_review</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_hdf5_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">review_yaml_file</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/pipelines/metadata_revision.html#update_hdf5_file_with_review"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#pipelines.metadata_revision.update_hdf5_file_with_review" title="Link to this definition"></a></dt>
<dd><p>Updates, appends, or deletes metadata attributes in an HDF5 file based on a provided YAML dictionary.</p>
<section id="parameters">
<h2>Parameters:<a class="headerlink" href="#parameters" title="Link to this heading"></a></h2>
<dl>
<dt>input_hdf5_file<span class="classifier">str</span></dt><dd><p>Path to the HDF5 file.</p>
</dd>
<dt>yaml_dict<span class="classifier">dict</span></dt><dd><p>Dictionary specifying objects and their attributes with operations. Example format:
{</p>
<blockquote>
<div><dl>
<dt>“object_name”: { “attributes”<span class="classifier">“attr_name”: { “value”: attr_value,</span></dt><dd><blockquote>
<div><blockquote>
<div><p>“delete”: true | false</p>
</div></blockquote>
<p>}</p>
</div></blockquote>
<p>}</p>
</dd>
</dl>
</div></blockquote>
<p>}</p>
</dd>
</dl>
</section>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="pipelines.metadata_revision.validate_yaml_dict">
<span class="sig-prename descclassname"><span class="pre">pipelines.metadata_revision.</span></span><span class="sig-name descname"><span class="pre">validate_yaml_dict</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_hdf5_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">yaml_dict</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/pipelines/metadata_revision.html#validate_yaml_dict"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#pipelines.metadata_revision.validate_yaml_dict" title="Link to this definition"></a></dt>
<dd></dd></dl>
</section>
</div>
</div>
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
<a href="src.html" class="btn btn-neutral float-left" title="HDF5 data operations" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
<a href="utils.html" class="btn btn-neutral float-right" title="Utilities" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
</div>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, JFFO.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

472
docs/build/html/modules/src.html vendored Normal file
View File

@@ -0,0 +1,472 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../">
<head>
<meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>HDF5 Data Operations &mdash; DIMA 1.0.0 documentation</title>
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../_static/jquery.js?v=5d32c60e"></script>
<script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../_static/documentation_options.js?v=8d563738"></script>
<script src="../_static/doctools.js?v=9a2dae69"></script>
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script src="../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Pipelines and workflows" href="pipelines.html" />
<link rel="prev" title="Welcome to DIMAs documentation!" href="../index.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../index.html" class="icon icon-home">
DIMA
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul class="current">
<li class="toctree-l1 current"><a class="current reference internal" href="#">HDF5 Data Operations</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#src.hdf5_ops.HDF5DataOpsManager"><code class="docutils literal notranslate"><span class="pre">HDF5DataOpsManager</span></code></a><ul>
<li class="toctree-l3"><a class="reference internal" href="#src.hdf5_ops.HDF5DataOpsManager.append_dataset"><code class="docutils literal notranslate"><span class="pre">HDF5DataOpsManager.append_dataset()</span></code></a></li>
<li class="toctree-l3"><a class="reference internal" href="#src.hdf5_ops.HDF5DataOpsManager.append_metadata"><code class="docutils literal notranslate"><span class="pre">HDF5DataOpsManager.append_metadata()</span></code></a></li>
<li class="toctree-l3"><a class="reference internal" href="#src.hdf5_ops.HDF5DataOpsManager.delete_metadata"><code class="docutils literal notranslate"><span class="pre">HDF5DataOpsManager.delete_metadata()</span></code></a></li>
<li class="toctree-l3"><a class="reference internal" href="#src.hdf5_ops.HDF5DataOpsManager.extract_and_load_dataset_metadata"><code class="docutils literal notranslate"><span class="pre">HDF5DataOpsManager.extract_and_load_dataset_metadata()</span></code></a></li>
<li class="toctree-l3"><a class="reference internal" href="#src.hdf5_ops.HDF5DataOpsManager.extract_dataset_as_dataframe"><code class="docutils literal notranslate"><span class="pre">HDF5DataOpsManager.extract_dataset_as_dataframe()</span></code></a></li>
<li class="toctree-l3"><a class="reference internal" href="#src.hdf5_ops.HDF5DataOpsManager.get_metadata"><code class="docutils literal notranslate"><span class="pre">HDF5DataOpsManager.get_metadata()</span></code></a></li>
<li class="toctree-l3"><a class="reference internal" href="#src.hdf5_ops.HDF5DataOpsManager.load_file_obj"><code class="docutils literal notranslate"><span class="pre">HDF5DataOpsManager.load_file_obj()</span></code></a></li>
<li class="toctree-l3"><a class="reference internal" href="#src.hdf5_ops.HDF5DataOpsManager.reformat_datetime_column"><code class="docutils literal notranslate"><span class="pre">HDF5DataOpsManager.reformat_datetime_column()</span></code></a></li>
<li class="toctree-l3"><a class="reference internal" href="#src.hdf5_ops.HDF5DataOpsManager.rename_metadata"><code class="docutils literal notranslate"><span class="pre">HDF5DataOpsManager.rename_metadata()</span></code></a></li>
<li class="toctree-l3"><a class="reference internal" href="#src.hdf5_ops.HDF5DataOpsManager.unload_file_obj"><code class="docutils literal notranslate"><span class="pre">HDF5DataOpsManager.unload_file_obj()</span></code></a></li>
<li class="toctree-l3"><a class="reference internal" href="#src.hdf5_ops.HDF5DataOpsManager.update_file"><code class="docutils literal notranslate"><span class="pre">HDF5DataOpsManager.update_file()</span></code></a></li>
<li class="toctree-l3"><a class="reference internal" href="#src.hdf5_ops.HDF5DataOpsManager.update_metadata"><code class="docutils literal notranslate"><span class="pre">HDF5DataOpsManager.update_metadata()</span></code></a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#src.hdf5_ops.get_groups_at_a_level"><code class="docutils literal notranslate"><span class="pre">get_groups_at_a_level()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#src.hdf5_ops.get_parent_child_relationships"><code class="docutils literal notranslate"><span class="pre">get_parent_child_relationships()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#src.hdf5_ops.read_mtable_as_dataframe"><code class="docutils literal notranslate"><span class="pre">read_mtable_as_dataframe()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#src.hdf5_ops.serialize_metadata"><code class="docutils literal notranslate"><span class="pre">serialize_metadata()</span></code></a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="#module-src.hdf5_writer">HDF5 Writer</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#src.hdf5_writer.create_hdf5_file_from_dataframe"><code class="docutils literal notranslate"><span class="pre">create_hdf5_file_from_dataframe()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#src.hdf5_writer.create_hdf5_file_from_filesystem_path"><code class="docutils literal notranslate"><span class="pre">create_hdf5_file_from_filesystem_path()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#src.hdf5_writer.save_processed_dataframe_to_hdf5"><code class="docutils literal notranslate"><span class="pre">save_processed_dataframe_to_hdf5()</span></code></a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="#module-src.hdf5_vis">Data Visualization</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#src.hdf5_vis.display_group_hierarchy_on_a_treemap"><code class="docutils literal notranslate"><span class="pre">display_group_hierarchy_on_a_treemap()</span></code></a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="pipelines.html">Pipelines and workflows</a></li>
<li class="toctree-l1"><a class="reference internal" href="utils.html">Data Structure Conversion</a></li>
<li class="toctree-l1"><a class="reference internal" href="notebooks.html">Notebooks</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../index.html">DIMA</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item active">HDF5 Data Operations</li>
<li class="wy-breadcrumbs-aside">
<a href="../_sources/modules/src.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<section id="module-src.hdf5_ops">
<span id="hdf5-data-operations"></span><h1>HDF5 Data Operations<a class="headerlink" href="#module-src.hdf5_ops" title="Link to this heading"></a></h1>
<dl class="py class">
<dt class="sig sig-object py" id="src.hdf5_ops.HDF5DataOpsManager">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">src.hdf5_ops.</span></span><span class="sig-name descname"><span class="pre">HDF5DataOpsManager</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">file_path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mode</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'r+'</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#HDF5DataOpsManager"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.HDF5DataOpsManager" title="Link to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
<p>A class to handle HDF5 fundamental middle level file operations to power data updates, metadata revision, and data analysis
with hdf5 files encoding multi-instrument experimental campaign data.</p>
<section id="parameters">
<h2>Parameters:<a class="headerlink" href="#parameters" title="Link to this heading"></a></h2>
<blockquote>
<div><dl class="simple">
<dt>path_to_file<span class="classifier">str</span></dt><dd><p>path/to/hdf5file.</p>
</dd>
<dt>mode<span class="classifier">str</span></dt><dd><p>r or r+ read or read/write mode only when file exists</p>
</dd>
</dl>
</div></blockquote>
<dl class="py method">
<dt class="sig sig-object py" id="src.hdf5_ops.HDF5DataOpsManager.append_dataset">
<span class="sig-name descname"><span class="pre">append_dataset</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset_dict</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">group_name</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#HDF5DataOpsManager.append_dataset"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.HDF5DataOpsManager.append_dataset" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="src.hdf5_ops.HDF5DataOpsManager.append_metadata">
<span class="sig-name descname"><span class="pre">append_metadata</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">obj_name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">annotation_dict</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#HDF5DataOpsManager.append_metadata"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.HDF5DataOpsManager.append_metadata" title="Link to this definition"></a></dt>
<dd><p>Appends metadata attributes to the specified object (obj_name) based on the provided annotation_dict.</p>
<p>This method ensures that the provided metadata attributes do not overwrite any existing ones. If an attribute already exists,
a ValueError is raised. The function supports storing scalar values (int, float, str) and compound values such as dictionaries
that are converted into NumPy structured arrays before being added to the metadata.</p>
<section id="id1">
<h3>Parameters:<a class="headerlink" href="#id1" title="Link to this heading"></a></h3>
<dl>
<dt>obj_name: str</dt><dd><p>Path to the target object (dataset or group) within the HDF5 file.</p>
</dd>
<dt>annotation_dict: dict</dt><dd><dl>
<dt>A dictionary where the keys represent new attribute names (strings), and the values can be:</dt><dd><ul class="simple">
<li><p>Scalars: int, float, or str.</p></li>
<li><p>Compound values (dictionaries) for more complex metadata, which are converted to NumPy structured arrays.</p></li>
</ul>
<p>Example of a compound value:</p>
<dl>
<dt>annotation_dict = {</dt><dd><dl class="simple">
<dt>“relative_humidity”: {</dt><dd><p>“value”: 65,
“units”: “percentage”,
“range”: “[0,100]”,
“definition”: “amount of water vapor present …”</p>
</dd>
</dl>
<p>}</p>
</dd>
</dl>
<p>}</p>
</dd>
</dl>
</dd>
</dl>
</section>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="src.hdf5_ops.HDF5DataOpsManager.delete_metadata">
<span class="sig-name descname"><span class="pre">delete_metadata</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">obj_name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">annotation_dict</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#HDF5DataOpsManager.delete_metadata"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.HDF5DataOpsManager.delete_metadata" title="Link to this definition"></a></dt>
<dd><p>Deletes metadata attributes of the specified object (obj_name) based on the provided annotation_dict.</p>
<section id="id2">
<h3>Parameters:<a class="headerlink" href="#id2" title="Link to this heading"></a></h3>
<dl class="simple">
<dt>obj_name: str</dt><dd><p>Path to the target object (dataset or group) within the HDF5 file.</p>
</dd>
<dt>annotation_dict: dict</dt><dd><p>Dictionary where keys represent attribute names, and values should be dictionaries containing
{“delete”: True} to mark them for deletion.</p>
</dd>
</dl>
</section>
<section id="example">
<h3>Example:<a class="headerlink" href="#example" title="Link to this heading"></a></h3>
<p>annotation_dict = {“attr_to_be_deleted”: {“delete”: True}}</p>
</section>
<section id="behavior">
<h3>Behavior:<a class="headerlink" href="#behavior" title="Link to this heading"></a></h3>
<ul class="simple">
<li><p>Deletes the specified attributes from the objects metadata if marked for deletion.</p></li>
<li><p>Issues a warning if the attribute is not found or not marked for deletion.</p></li>
</ul>
</section>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="src.hdf5_ops.HDF5DataOpsManager.extract_and_load_dataset_metadata">
<span class="sig-name descname"><span class="pre">extract_and_load_dataset_metadata</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#HDF5DataOpsManager.extract_and_load_dataset_metadata"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.HDF5DataOpsManager.extract_and_load_dataset_metadata" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="src.hdf5_ops.HDF5DataOpsManager.extract_dataset_as_dataframe">
<span class="sig-name descname"><span class="pre">extract_dataset_as_dataframe</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset_name</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#HDF5DataOpsManager.extract_dataset_as_dataframe"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.HDF5DataOpsManager.extract_dataset_as_dataframe" title="Link to this definition"></a></dt>
<dd><p>returns a copy of the dataset content in the form of dataframe when possible or numpy array</p>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="src.hdf5_ops.HDF5DataOpsManager.get_metadata">
<span class="sig-name descname"><span class="pre">get_metadata</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">obj_path</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#HDF5DataOpsManager.get_metadata"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.HDF5DataOpsManager.get_metadata" title="Link to this definition"></a></dt>
<dd><p>Get file attributes from object at path = obj_path. For example,
obj_path = / will get root level attributes or metadata.</p>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="src.hdf5_ops.HDF5DataOpsManager.load_file_obj">
<span class="sig-name descname"><span class="pre">load_file_obj</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#HDF5DataOpsManager.load_file_obj"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.HDF5DataOpsManager.load_file_obj" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="src.hdf5_ops.HDF5DataOpsManager.reformat_datetime_column">
<span class="sig-name descname"><span class="pre">reformat_datetime_column</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset_name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">column_name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">src_format</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">desired_format</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'%Y-%m-%d</span> <span class="pre">%H:%M:%S.%f'</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#HDF5DataOpsManager.reformat_datetime_column"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.HDF5DataOpsManager.reformat_datetime_column" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="src.hdf5_ops.HDF5DataOpsManager.rename_metadata">
<span class="sig-name descname"><span class="pre">rename_metadata</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">obj_name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">renaming_map</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#HDF5DataOpsManager.rename_metadata"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.HDF5DataOpsManager.rename_metadata" title="Link to this definition"></a></dt>
<dd><p>Renames metadata attributes of the specified object (obj_name) based on the provided renaming_map.</p>
<section id="id3">
<h3>Parameters:<a class="headerlink" href="#id3" title="Link to this heading"></a></h3>
<dl>
<dt>obj_name: str</dt><dd><p>Path to the target object (dataset or group) within the HDF5 file.</p>
</dd>
<dt>renaming_map: dict</dt><dd><p>A dictionary where keys are current attribute names (strings), and values are the new attribute names (strings or byte strings) to rename to.</p>
<dl class="simple">
<dt>renaming_map = {</dt><dd><p>“old_attr_name”: “new_attr_name”,
“old_attr_2”: “new_attr_2”</p>
</dd>
</dl>
<p>}</p>
</dd>
</dl>
</section>
</dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="src.hdf5_ops.HDF5DataOpsManager.unload_file_obj">
<span class="sig-name descname"><span class="pre">unload_file_obj</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#HDF5DataOpsManager.unload_file_obj"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.HDF5DataOpsManager.unload_file_obj" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="src.hdf5_ops.HDF5DataOpsManager.update_file">
<span class="sig-name descname"><span class="pre">update_file</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path_to_append_dir</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#HDF5DataOpsManager.update_file"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.HDF5DataOpsManager.update_file" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="src.hdf5_ops.HDF5DataOpsManager.update_metadata">
<span class="sig-name descname"><span class="pre">update_metadata</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">obj_name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">annotation_dict</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#HDF5DataOpsManager.update_metadata"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.HDF5DataOpsManager.update_metadata" title="Link to this definition"></a></dt>
<dd><p>Updates the value of existing metadata attributes of the specified object (obj_name) based on the provided annotation_dict.</p>
<p>The function disregards non-existing attributes and suggests to use the append_metadata() method to include those in the metadata.</p>
<section id="id4">
<h3>Parameters:<a class="headerlink" href="#id4" title="Link to this heading"></a></h3>
<dl>
<dt>obj_name<span class="classifier">str</span></dt><dd><p>Path to the target object (dataset or group) within the HDF5 file.</p>
</dd>
<dt>annotation_dict: dict</dt><dd><dl>
<dt>A dictionary where the keys represent existing attribute names (strings), and the values can be:</dt><dd><ul class="simple">
<li><p>Scalars: int, float, or str.</p></li>
<li><p>Compound values (dictionaries) for more complex metadata, which are converted to NumPy structured arrays.</p></li>
</ul>
<p>Example of a compound value:</p>
<dl>
<dt>annotation_dict = {</dt><dd><dl class="simple">
<dt>“relative_humidity”: {</dt><dd><p>“value”: 65,
“units”: “percentage”,
“range”: “[0,100]”,
“definition”: “amount of water vapor present …”</p>
</dd>
</dl>
<p>}</p>
</dd>
</dl>
<p>}</p>
</dd>
</dl>
</dd>
</dl>
</section>
</dd></dl>
</section>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="src.hdf5_ops.get_groups_at_a_level">
<span class="sig-prename descclassname"><span class="pre">src.hdf5_ops.</span></span><span class="sig-name descname"><span class="pre">get_groups_at_a_level</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">file</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">File</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">level</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#get_groups_at_a_level"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.get_groups_at_a_level" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="src.hdf5_ops.get_parent_child_relationships">
<span class="sig-prename descclassname"><span class="pre">src.hdf5_ops.</span></span><span class="sig-name descname"><span class="pre">get_parent_child_relationships</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">file</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">File</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#get_parent_child_relationships"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.get_parent_child_relationships" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="src.hdf5_ops.read_mtable_as_dataframe">
<span class="sig-prename descclassname"><span class="pre">src.hdf5_ops.</span></span><span class="sig-name descname"><span class="pre">read_mtable_as_dataframe</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">filename</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_ops.html#read_mtable_as_dataframe"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.read_mtable_as_dataframe" title="Link to this definition"></a></dt>
<dd><p>Reconstruct a MATLAB Table encoded in a .h5 file as a Pandas DataFrame.</p>
<p>This function reads a .h5 file containing a MATLAB Table and reconstructs it as a Pandas DataFrame.
The input .h5 file contains one group per row of the MATLAB Table. Each group stores the tables
dataset-like variables as Datasets, while categorical and numerical variables are represented as
attributes of the respective group.</p>
<p>To ensure homogeneity of data columns, the DataFrame is constructed column-wise.</p>
<section id="id5">
<h2>Parameters<a class="headerlink" href="#id5" title="Link to this heading"></a></h2>
<dl class="simple">
<dt>filename<span class="classifier">str</span></dt><dd><p>The name of the .h5 file. This may include the files location and path information.</p>
</dd>
</dl>
</section>
<section id="returns">
<h2>Returns<a class="headerlink" href="#returns" title="Link to this heading"></a></h2>
<dl class="simple">
<dt>pd.DataFrame</dt><dd><p>The MATLAB Table reconstructed as a Pandas DataFrame.</p>
</dd>
</dl>
</section>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="src.hdf5_ops.serialize_metadata">
<span class="sig-prename descclassname"><span class="pre">src.hdf5_ops.</span></span><span class="sig-name descname"><span class="pre">serialize_metadata</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_filename_path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">folder_depth</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">4</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_format</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'yaml'</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">str</span></span></span><a class="reference internal" href="../_modules/src/hdf5_ops.html#serialize_metadata"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_ops.serialize_metadata" title="Link to this definition"></a></dt>
<dd><p>Serialize metadata from an HDF5 file into YAML or JSON format.</p>
<section id="id6">
<h2>Parameters<a class="headerlink" href="#id6" title="Link to this heading"></a></h2>
<dl class="simple">
<dt>input_filename_path<span class="classifier">str</span></dt><dd><p>The path to the input HDF5 file.</p>
</dd>
<dt>folder_depth<span class="classifier">int, optional</span></dt><dd><p>The folder depth to control how much of the HDF5 file hierarchy is traversed (default is 4).</p>
</dd>
<dt>output_format<span class="classifier">str, optional</span></dt><dd><p>The format to serialize the output, either yaml or json (default is yaml).</p>
</dd>
</dl>
</section>
<section id="id7">
<h2>Returns<a class="headerlink" href="#id7" title="Link to this heading"></a></h2>
<dl class="simple">
<dt>str</dt><dd><p>The output file path where the serialized metadata is stored (either .yaml or .json).</p>
</dd>
</dl>
</section>
</dd></dl>
</section>
<section id="module-src.hdf5_writer">
<span id="hdf5-writer"></span><h1>HDF5 Writer<a class="headerlink" href="#module-src.hdf5_writer" title="Link to this heading"></a></h1>
<dl class="py function">
<dt class="sig sig-object py" id="src.hdf5_writer.create_hdf5_file_from_dataframe">
<span class="sig-prename descclassname"><span class="pre">src.hdf5_writer.</span></span><span class="sig-name descname"><span class="pre">create_hdf5_file_from_dataframe</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">ofilename</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">input_data</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">group_by_funcs</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">approach</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">extract_attrs_func</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_writer.html#create_hdf5_file_from_dataframe"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_writer.create_hdf5_file_from_dataframe" title="Link to this definition"></a></dt>
<dd><p>Creates an HDF5 file with hierarchical groups based on the specified grouping functions or columns.</p>
<section id="id8">
<h2>Parameters:<a class="headerlink" href="#id8" title="Link to this heading"></a></h2>
<blockquote>
<div><p>ofilename (str): Path for the output HDF5 file.
input_data (pd.DataFrame or str): Input data as a DataFrame or a valid file system path.
group_by_funcs (list): List of callables or column names to define hierarchical grouping.
approach (str): Specifies the approach (top-down or bottom-up) for creating the HDF5 file.
extract_attrs_func (callable, optional): Function to extract additional attributes for HDF5 groups.</p>
</div></blockquote>
</section>
<section id="id9">
<h2>Returns:<a class="headerlink" href="#id9" title="Link to this heading"></a></h2>
<blockquote>
<div><p>None</p>
</div></blockquote>
</section>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="src.hdf5_writer.create_hdf5_file_from_filesystem_path">
<span class="sig-prename descclassname"><span class="pre">src.hdf5_writer.</span></span><span class="sig-name descname"><span class="pre">create_hdf5_file_from_filesystem_path</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path_to_input_directory</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">path_to_filenames_dict</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">dict</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">select_dir_keywords</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">[]</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">root_metadata_dict</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">dict</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">{}</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mode</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'w'</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_writer.html#create_hdf5_file_from_filesystem_path"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_writer.create_hdf5_file_from_filesystem_path" title="Link to this definition"></a></dt>
<dd><p>Creates an .h5 file with name “output_filename” that preserves the directory tree (or folder structure)
of a given filesystem path.</p>
<p>The data integration capabilities are limited by our file reader, which can only access data from a list of
admissible file formats. These, however, can be extended. Directories are groups in the resulting HDF5 file.
Files are formatted as composite objects consisting of a group, file, and attributes.</p>
<section id="id10">
<h2>Parameters<a class="headerlink" href="#id10" title="Link to this heading"></a></h2>
<dl class="simple">
<dt>output_filename<span class="classifier">str</span></dt><dd><p>Name of the output HDF5 file.</p>
</dd>
<dt>path_to_input_directory<span class="classifier">str</span></dt><dd><p>Path to root directory, specified with forward slashes, e.g., path/to/root.</p>
</dd>
<dt>path_to_filenames_dict<span class="classifier">dict, optional</span></dt><dd><p>A pre-processed dictionary where keys are directory paths on the input directorys tree and values are lists of files.
If provided, input_file_system_path is ignored.</p>
</dd>
<dt>select_dir_keywords<span class="classifier">list</span></dt><dd><dl class="simple">
<dt>List of string elements to consider or select only directory paths that contain</dt><dd><p>a word in select_dir_keywords. When empty, all directory paths are considered
to be included in the HDF5 file group hierarchy.</p>
</dd>
</dl>
</dd>
<dt>root_metadata_dict<span class="classifier">dict</span></dt><dd><p>Metadata to include at the root level of the HDF5 file.</p>
</dd>
<dt>mode<span class="classifier">str</span></dt><dd><p>w create File, truncate if it exists, or r+ read/write, File must exists. By default, mode = “w”.</p>
</dd>
</dl>
</section>
<section id="id11">
<h2>Returns<a class="headerlink" href="#id11" title="Link to this heading"></a></h2>
<dl class="simple">
<dt>output_filename<span class="classifier">str</span></dt><dd><p>Path to the created HDF5 file.</p>
</dd>
</dl>
</section>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="src.hdf5_writer.save_processed_dataframe_to_hdf5">
<span class="sig-prename descclassname"><span class="pre">src.hdf5_writer.</span></span><span class="sig-name descname"><span class="pre">save_processed_dataframe_to_hdf5</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">df</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">annotator</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_filename</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_writer.html#save_processed_dataframe_to_hdf5"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_writer.save_processed_dataframe_to_hdf5" title="Link to this definition"></a></dt>
<dd><p>Save processed dataframe columns with annotations to an HDF5 file.</p>
<dl class="simple">
<dt>Parameters:</dt><dd><p>df (pd.DataFrame): DataFrame containing processed time series.
annotator (): Annotator object with get_metadata method.
output_filename (str): Path to the source HDF5 file.</p>
</dd>
</dl>
</dd></dl>
</section>
<section id="module-src.hdf5_vis">
<span id="data-visualization"></span><h1>Data Visualization<a class="headerlink" href="#module-src.hdf5_vis" title="Link to this heading"></a></h1>
<dl class="py function">
<dt class="sig sig-object py" id="src.hdf5_vis.display_group_hierarchy_on_a_treemap">
<span class="sig-prename descclassname"><span class="pre">src.hdf5_vis.</span></span><span class="sig-name descname"><span class="pre">display_group_hierarchy_on_a_treemap</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">filename</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/src/hdf5_vis.html#display_group_hierarchy_on_a_treemap"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#src.hdf5_vis.display_group_hierarchy_on_a_treemap" title="Link to this definition"></a></dt>
<dd><p>filename (str): hdf5 files filename</p>
</dd></dl>
</section>
</div>
</div>
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
<a href="../index.html" class="btn btn-neutral float-left" title="Welcome to DIMAs documentation!" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
<a href="pipelines.html" class="btn btn-neutral float-right" title="Pipelines and workflows" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
</div>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, JFFO.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

304
docs/build/html/modules/utils.html vendored Normal file
View File

@@ -0,0 +1,304 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../">
<head>
<meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Data Structure Conversion &mdash; DIMA 1.0.0 documentation</title>
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../_static/jquery.js?v=5d32c60e"></script>
<script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../_static/documentation_options.js?v=8d563738"></script>
<script src="../_static/doctools.js?v=9a2dae69"></script>
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script src="../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Notebooks" href="notebooks.html" />
<link rel="prev" title="Pipelines and workflows" href="pipelines.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../index.html" class="icon icon-home">
DIMA
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="src.html">HDF5 Data Operations</a></li>
<li class="toctree-l1"><a class="reference internal" href="src.html#module-src.hdf5_writer">HDF5 Writer</a></li>
<li class="toctree-l1"><a class="reference internal" href="src.html#module-src.hdf5_vis">Data Visualization</a></li>
<li class="toctree-l1"><a class="reference internal" href="pipelines.html">Pipelines and workflows</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Data Structure Conversion</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#utils.g5505_utils.augment_with_filenumber"><code class="docutils literal notranslate"><span class="pre">augment_with_filenumber()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#utils.g5505_utils.augment_with_filetype"><code class="docutils literal notranslate"><span class="pre">augment_with_filetype()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#utils.g5505_utils.convert_attrdict_to_np_structured_array"><code class="docutils literal notranslate"><span class="pre">convert_attrdict_to_np_structured_array()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#utils.g5505_utils.convert_dataframe_to_np_structured_array"><code class="docutils literal notranslate"><span class="pre">convert_dataframe_to_np_structured_array()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#utils.g5505_utils.convert_string_to_bytes"><code class="docutils literal notranslate"><span class="pre">convert_string_to_bytes()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#utils.g5505_utils.copy_directory_with_contraints"><code class="docutils literal notranslate"><span class="pre">copy_directory_with_contraints()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#utils.g5505_utils.created_at"><code class="docutils literal notranslate"><span class="pre">created_at()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#utils.g5505_utils.group_by_df_column"><code class="docutils literal notranslate"><span class="pre">group_by_df_column()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#utils.g5505_utils.infer_units"><code class="docutils literal notranslate"><span class="pre">infer_units()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#utils.g5505_utils.is_callable_list"><code class="docutils literal notranslate"><span class="pre">is_callable_list()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#utils.g5505_utils.is_str_list"><code class="docutils literal notranslate"><span class="pre">is_str_list()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#utils.g5505_utils.is_structured_array"><code class="docutils literal notranslate"><span class="pre">is_structured_array()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#utils.g5505_utils.make_file_copy"><code class="docutils literal notranslate"><span class="pre">make_file_copy()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#utils.g5505_utils.progressBar"><code class="docutils literal notranslate"><span class="pre">progressBar()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#utils.g5505_utils.sanitize_dataframe"><code class="docutils literal notranslate"><span class="pre">sanitize_dataframe()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#utils.g5505_utils.setup_logging"><code class="docutils literal notranslate"><span class="pre">setup_logging()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#utils.g5505_utils.split_sample_col_into_sample_and_data_quality_cols"><code class="docutils literal notranslate"><span class="pre">split_sample_col_into_sample_and_data_quality_cols()</span></code></a></li>
<li class="toctree-l2"><a class="reference internal" href="#utils.g5505_utils.to_serializable_dtype"><code class="docutils literal notranslate"><span class="pre">to_serializable_dtype()</span></code></a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="notebooks.html">Notebooks</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../index.html">DIMA</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item active">Data Structure Conversion</li>
<li class="wy-breadcrumbs-aside">
<a href="../_sources/modules/utils.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<section id="module-utils.g5505_utils">
<span id="data-structure-conversion"></span><h1>Data Structure Conversion<a class="headerlink" href="#module-utils.g5505_utils" title="Link to this heading"></a></h1>
<dl class="py function">
<dt class="sig sig-object py" id="utils.g5505_utils.augment_with_filenumber">
<span class="sig-prename descclassname"><span class="pre">utils.g5505_utils.</span></span><span class="sig-name descname"><span class="pre">augment_with_filenumber</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">df</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/utils/g5505_utils.html#augment_with_filenumber"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#utils.g5505_utils.augment_with_filenumber" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="utils.g5505_utils.augment_with_filetype">
<span class="sig-prename descclassname"><span class="pre">utils.g5505_utils.</span></span><span class="sig-name descname"><span class="pre">augment_with_filetype</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">df</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/utils/g5505_utils.html#augment_with_filetype"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#utils.g5505_utils.augment_with_filetype" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="utils.g5505_utils.convert_attrdict_to_np_structured_array">
<span class="sig-prename descclassname"><span class="pre">utils.g5505_utils.</span></span><span class="sig-name descname"><span class="pre">convert_attrdict_to_np_structured_array</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">attr_value</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">dict</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/utils/g5505_utils.html#convert_attrdict_to_np_structured_array"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#utils.g5505_utils.convert_attrdict_to_np_structured_array" title="Link to this definition"></a></dt>
<dd><p>Converts a dictionary of attributes into a numpy structured array for HDF5
compound type compatibility.</p>
<p>Each dictionary key is mapped to a field in the structured array, with the
data type (S) determined by the longest string representation of the values.
If the dictionary is empty, the function returns missing.</p>
<section id="parameters">
<h2>Parameters<a class="headerlink" href="#parameters" title="Link to this heading"></a></h2>
<dl>
<dt>attr_value<span class="classifier">dict</span></dt><dd><p>Dictionary containing the attributes to be converted. Example:
attr_value = {</p>
<blockquote>
<div><p>name: Temperature,
unit: Celsius,
value: 23.5,
timestamp: 2023-09-26 10:00</p>
</div></blockquote>
<p>}</p>
</dd>
</dl>
</section>
<section id="returns">
<h2>Returns<a class="headerlink" href="#returns" title="Link to this heading"></a></h2>
<dl class="simple">
<dt>new_attr_value<span class="classifier">ndarray or str</span></dt><dd><p>Numpy structured array with UTF-8 encoded fields. Returns missing if
the input dictionary is empty.</p>
</dd>
</dl>
</section>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="utils.g5505_utils.convert_dataframe_to_np_structured_array">
<span class="sig-prename descclassname"><span class="pre">utils.g5505_utils.</span></span><span class="sig-name descname"><span class="pre">convert_dataframe_to_np_structured_array</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">df</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">DataFrame</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/utils/g5505_utils.html#convert_dataframe_to_np_structured_array"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#utils.g5505_utils.convert_dataframe_to_np_structured_array" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="utils.g5505_utils.convert_string_to_bytes">
<span class="sig-prename descclassname"><span class="pre">utils.g5505_utils.</span></span><span class="sig-name descname"><span class="pre">convert_string_to_bytes</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_list</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/utils/g5505_utils.html#convert_string_to_bytes"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#utils.g5505_utils.convert_string_to_bytes" title="Link to this definition"></a></dt>
<dd><p>Convert a list of strings into a numpy array with utf8-type entries.</p>
<section id="id1">
<h2>Parameters<a class="headerlink" href="#id1" title="Link to this heading"></a></h2>
<p>input_list (list) : list of string objects</p>
</section>
<section id="id2">
<h2>Returns<a class="headerlink" href="#id2" title="Link to this heading"></a></h2>
<p>input_array_bytes (ndarray): array of ut8-type entries.</p>
</section>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="utils.g5505_utils.copy_directory_with_contraints">
<span class="sig-prename descclassname"><span class="pre">utils.g5505_utils.</span></span><span class="sig-name descname"><span class="pre">copy_directory_with_contraints</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_dir_path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_dir_path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">select_dir_keywords</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">select_file_keywords</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">allowed_file_extensions</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dry_run</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/utils/g5505_utils.html#copy_directory_with_contraints"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#utils.g5505_utils.copy_directory_with_contraints" title="Link to this definition"></a></dt>
<dd><p>Copies files from input_dir_path to output_dir_path based on specified constraints.</p>
<section id="id3">
<h2>Parameters<a class="headerlink" href="#id3" title="Link to this heading"></a></h2>
<blockquote>
<div><p>input_dir_path (str): Path to the input directory.
output_dir_path (str): Path to the output directory.
select_dir_keywords (list): optional, List of keywords for selecting directories.
select_file_keywords (list): optional, List of keywords for selecting files.
allowed_file_extensions (list): optional, List of allowed file extensions.</p>
</div></blockquote>
</section>
<section id="id4">
<h2>Returns<a class="headerlink" href="#id4" title="Link to this heading"></a></h2>
<blockquote>
<div><p>path_to_files_dict (dict): dictionary mapping directory paths to lists of copied file names satisfying the constraints.</p>
</div></blockquote>
</section>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="utils.g5505_utils.created_at">
<span class="sig-prename descclassname"><span class="pre">utils.g5505_utils.</span></span><span class="sig-name descname"><span class="pre">created_at</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">datetime_format</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'%Y-%m-%d</span> <span class="pre">%H:%M:%S'</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/utils/g5505_utils.html#created_at"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#utils.g5505_utils.created_at" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="utils.g5505_utils.group_by_df_column">
<span class="sig-prename descclassname"><span class="pre">utils.g5505_utils.</span></span><span class="sig-name descname"><span class="pre">group_by_df_column</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">df</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">column_name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/utils/g5505_utils.html#group_by_df_column"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#utils.g5505_utils.group_by_df_column" title="Link to this definition"></a></dt>
<dd><p>df (pandas.DataFrame):
column_name (str): column_name of df by which grouping operation will take place.</p>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="utils.g5505_utils.infer_units">
<span class="sig-prename descclassname"><span class="pre">utils.g5505_utils.</span></span><span class="sig-name descname"><span class="pre">infer_units</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">column_name</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/utils/g5505_utils.html#infer_units"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#utils.g5505_utils.infer_units" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="utils.g5505_utils.is_callable_list">
<span class="sig-prename descclassname"><span class="pre">utils.g5505_utils.</span></span><span class="sig-name descname"><span class="pre">is_callable_list</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/utils/g5505_utils.html#is_callable_list"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#utils.g5505_utils.is_callable_list" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="utils.g5505_utils.is_str_list">
<span class="sig-prename descclassname"><span class="pre">utils.g5505_utils.</span></span><span class="sig-name descname"><span class="pre">is_str_list</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">x</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">list</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/utils/g5505_utils.html#is_str_list"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#utils.g5505_utils.is_str_list" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="utils.g5505_utils.is_structured_array">
<span class="sig-prename descclassname"><span class="pre">utils.g5505_utils.</span></span><span class="sig-name descname"><span class="pre">is_structured_array</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">attr_val</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/utils/g5505_utils.html#is_structured_array"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#utils.g5505_utils.is_structured_array" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="utils.g5505_utils.make_file_copy">
<span class="sig-prename descclassname"><span class="pre">utils.g5505_utils.</span></span><span class="sig-name descname"><span class="pre">make_file_copy</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">source_file_path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_folder_name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'tmp_files'</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/utils/g5505_utils.html#make_file_copy"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#utils.g5505_utils.make_file_copy" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="utils.g5505_utils.progressBar">
<span class="sig-prename descclassname"><span class="pre">utils.g5505_utils.</span></span><span class="sig-name descname"><span class="pre">progressBar</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">count_value</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">total</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">suffix</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">''</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/utils/g5505_utils.html#progressBar"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#utils.g5505_utils.progressBar" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="utils.g5505_utils.sanitize_dataframe">
<span class="sig-prename descclassname"><span class="pre">utils.g5505_utils.</span></span><span class="sig-name descname"><span class="pre">sanitize_dataframe</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">df</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">DataFrame</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">DataFrame</span></span></span><a class="reference internal" href="../_modules/utils/g5505_utils.html#sanitize_dataframe"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#utils.g5505_utils.sanitize_dataframe" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="utils.g5505_utils.setup_logging">
<span class="sig-prename descclassname"><span class="pre">utils.g5505_utils.</span></span><span class="sig-name descname"><span class="pre">setup_logging</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">log_dir</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">log_filename</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/utils/g5505_utils.html#setup_logging"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#utils.g5505_utils.setup_logging" title="Link to this definition"></a></dt>
<dd><p>Sets up logging to a specified directory and file.</p>
<dl class="simple">
<dt>Parameters:</dt><dd><p>log_dir (str): Directory to save the log file.
log_filename (str): Name of the log file.</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="utils.g5505_utils.split_sample_col_into_sample_and_data_quality_cols">
<span class="sig-prename descclassname"><span class="pre">utils.g5505_utils.</span></span><span class="sig-name descname"><span class="pre">split_sample_col_into_sample_and_data_quality_cols</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input_data</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">DataFrame</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/utils/g5505_utils.html#split_sample_col_into_sample_and_data_quality_cols"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#utils.g5505_utils.split_sample_col_into_sample_and_data_quality_cols" title="Link to this definition"></a></dt>
<dd></dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="utils.g5505_utils.to_serializable_dtype">
<span class="sig-prename descclassname"><span class="pre">utils.g5505_utils.</span></span><span class="sig-name descname"><span class="pre">to_serializable_dtype</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">value</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/utils/g5505_utils.html#to_serializable_dtype"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#utils.g5505_utils.to_serializable_dtype" title="Link to this definition"></a></dt>
<dd><p>Transform values dtype into YAML/JSON compatible dtype</p>
<section id="id5">
<h2>Parameters<a class="headerlink" href="#id5" title="Link to this heading"></a></h2>
<dl class="simple">
<dt>value<span class="classifier">_type_</span></dt><dd><p>_description_</p>
</dd>
</dl>
</section>
<section id="id6">
<h2>Returns<a class="headerlink" href="#id6" title="Link to this heading"></a></h2>
<dl class="simple">
<dt>_type_</dt><dd><p>_description_</p>
</dd>
</dl>
</section>
</dd></dl>
</section>
</div>
</div>
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
<a href="pipelines.html" class="btn btn-neutral float-left" title="Pipelines and workflows" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
<a href="notebooks.html" class="btn btn-neutral float-right" title="Notebooks" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
</div>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, JFFO.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

View File

@@ -0,0 +1,111 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../">
<head>
<meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Tutorial workflows &mdash; DIMA 1.0.0 documentation</title>
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="../_static/jquery.js?v=5d32c60e"></script>
<script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../_static/documentation_options.js?v=8d563738"></script>
<script src="../_static/doctools.js?v=9a2dae69"></script>
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script src="../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../index.html" class="icon icon-home">
DIMA
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../modules/src.html">HDF5 data operations</a></li>
<li class="toctree-l1"><a class="reference internal" href="../modules/src.html#module-src.hdf5_writer">Data integration with HDF5</a></li>
<li class="toctree-l1"><a class="reference internal" href="../modules/src.html#module-src.hdf5_vis">Data visualization</a></li>
<li class="toctree-l1"><a class="reference internal" href="../modules/pipelines.html">Pipelines and workflows</a></li>
<li class="toctree-l1"><a class="reference internal" href="../modules/utils.html">Utilities</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../index.html">DIMA</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item active">Tutorial workflows</li>
<li class="wy-breadcrumbs-aside">
<a href="../_sources/notebooks/workflow_di.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<section id="tutorial-workflows">
<h1>Tutorial workflows<a class="headerlink" href="#tutorial-workflows" title="Link to this heading"></a></h1>
</section>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, JFFO.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

BIN
docs/build/html/objects.inv vendored Normal file

Binary file not shown.

187
docs/build/html/py-modindex.html vendored Normal file
View File

@@ -0,0 +1,187 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="./">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Python Module Index &mdash; DIMA 1.0.0 documentation</title>
<link rel="stylesheet" type="text/css" href="_static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="_static/jquery.js?v=5d32c60e"></script>
<script src="_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="_static/documentation_options.js?v=8d563738"></script>
<script src="_static/doctools.js?v=9a2dae69"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script src="_static/js/theme.js"></script>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="index.html" class="icon icon-home">
DIMA
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="modules/src.html">HDF5 Data Operations</a></li>
<li class="toctree-l1"><a class="reference internal" href="modules/src.html#module-src.hdf5_writer">HDF5 Writer</a></li>
<li class="toctree-l1"><a class="reference internal" href="modules/src.html#module-src.hdf5_vis">Data Visualization</a></li>
<li class="toctree-l1"><a class="reference internal" href="modules/pipelines.html">Pipelines and workflows</a></li>
<li class="toctree-l1"><a class="reference internal" href="modules/utils.html">Data Structure Conversion</a></li>
<li class="toctree-l1"><a class="reference internal" href="modules/notebooks.html">Notebooks</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">DIMA</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item active">Python Module Index</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Python Module Index</h1>
<div class="modindex-jumpbox">
<a href="#cap-n"><strong>n</strong></a> |
<a href="#cap-p"><strong>p</strong></a> |
<a href="#cap-s"><strong>s</strong></a> |
<a href="#cap-u"><strong>u</strong></a>
</div>
<table class="indextable modindextable">
<tr class="pcap"><td></td><td>&#160;</td><td></td></tr>
<tr class="cap" id="cap-n"><td></td><td>
<strong>n</strong></td><td></td></tr>
<tr>
<td></td>
<td>
<a href="modules/notebooks.html#module-notebooks"><code class="xref">notebooks</code></a></td><td>
<em></em></td></tr>
<tr class="pcap"><td></td><td>&#160;</td><td></td></tr>
<tr class="cap" id="cap-p"><td></td><td>
<strong>p</strong></td><td></td></tr>
<tr>
<td><img src="_static/minus.png" class="toggler"
id="toggle-1" style="display: none" alt="-" /></td>
<td>
<code class="xref">pipelines</code></td><td>
<em></em></td></tr>
<tr class="cg-1">
<td></td>
<td>&#160;&#160;&#160;
<a href="modules/pipelines.html#module-pipelines.data_integration"><code class="xref">pipelines.data_integration</code></a></td><td>
<em></em></td></tr>
<tr class="cg-1">
<td></td>
<td>&#160;&#160;&#160;
<a href="modules/pipelines.html#module-pipelines.metadata_revision"><code class="xref">pipelines.metadata_revision</code></a></td><td>
<em></em></td></tr>
<tr class="pcap"><td></td><td>&#160;</td><td></td></tr>
<tr class="cap" id="cap-s"><td></td><td>
<strong>s</strong></td><td></td></tr>
<tr>
<td><img src="_static/minus.png" class="toggler"
id="toggle-2" style="display: none" alt="-" /></td>
<td>
<code class="xref">src</code></td><td>
<em></em></td></tr>
<tr class="cg-2">
<td></td>
<td>&#160;&#160;&#160;
<a href="modules/src.html#module-src.hdf5_ops"><code class="xref">src.hdf5_ops</code></a></td><td>
<em></em></td></tr>
<tr class="cg-2">
<td></td>
<td>&#160;&#160;&#160;
<a href="modules/src.html#module-src.hdf5_vis"><code class="xref">src.hdf5_vis</code></a></td><td>
<em></em></td></tr>
<tr class="cg-2">
<td></td>
<td>&#160;&#160;&#160;
<a href="modules/src.html#module-src.hdf5_writer"><code class="xref">src.hdf5_writer</code></a></td><td>
<em></em></td></tr>
<tr class="pcap"><td></td><td>&#160;</td><td></td></tr>
<tr class="cap" id="cap-u"><td></td><td>
<strong>u</strong></td><td></td></tr>
<tr>
<td><img src="_static/minus.png" class="toggler"
id="toggle-3" style="display: none" alt="-" /></td>
<td>
<code class="xref">utils</code></td><td>
<em></em></td></tr>
<tr class="cg-3">
<td></td>
<td>&#160;&#160;&#160;
<a href="modules/utils.html#module-utils.g5505_utils"><code class="xref">utils.g5505_utils</code></a></td><td>
<em></em></td></tr>
</table>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, JFFO.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>

128
docs/build/html/search.html vendored Normal file
View File

@@ -0,0 +1,128 @@
<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="./">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Search &mdash; DIMA 1.0.0 documentation</title>
<link rel="stylesheet" type="text/css" href="_static/pygments.css?v=80d5e7a1" />
<link rel="stylesheet" type="text/css" href="_static/css/theme.css?v=19f00094" />
<!--[if lt IE 9]>
<script src="_static/js/html5shiv.min.js"></script>
<![endif]-->
<script src="_static/jquery.js?v=5d32c60e"></script>
<script src="_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="_static/documentation_options.js?v=8d563738"></script>
<script src="_static/doctools.js?v=9a2dae69"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script src="_static/js/theme.js"></script>
<script src="_static/searchtools.js"></script>
<script src="_static/language_data.js"></script>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="#" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="index.html" class="icon icon-home">
DIMA
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="#" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="modules/src.html">HDF5 Data Operations</a></li>
<li class="toctree-l1"><a class="reference internal" href="modules/src.html#module-src.hdf5_writer">HDF5 Writer</a></li>
<li class="toctree-l1"><a class="reference internal" href="modules/src.html#module-src.hdf5_vis">Data Visualization</a></li>
<li class="toctree-l1"><a class="reference internal" href="modules/pipelines.html">Pipelines and workflows</a></li>
<li class="toctree-l1"><a class="reference internal" href="modules/utils.html">Data Structure Conversion</a></li>
<li class="toctree-l1"><a class="reference internal" href="modules/notebooks.html">Notebooks</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">DIMA</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item active">Search</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<noscript>
<div id="fallback" class="admonition warning">
<p class="last">
Please activate JavaScript to enable the search functionality.
</p>
</div>
</noscript>
<div id="search-results">
</div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2024, JFFO.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
<script>
jQuery(function() { Search.loadIndex("searchindex.js"); });
</script>
<script id="searchindexloader"></script>
</body>
</html>

1
docs/build/html/searchindex.js vendored Normal file

File diff suppressed because one or more lines are too long

35
docs/make.bat Normal file
View File

@@ -0,0 +1,35 @@
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.https://www.sphinx-doc.org/
exit /b 1
)
if "%1" == "" goto help
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd

55
docs/source/conf.py Normal file
View File

@@ -0,0 +1,55 @@
# Configuration file for the Sphinx documentation builder.
#
# For the full list of built-in configuration values, see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
import os
import sys
sys.path.insert(0, os.path.abspath('../..'))
print(os.getcwd())
#print(os.path.abspath('../..'))
# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
project = 'DIMA'
copyright = '2024, JFFO'
author = 'JFFO'
release = '1.0.0'
# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
extensions = [
'nbsphinx', # added for jupyter notebooks
'sphinx.ext.autodoc',
'sphinx.ext.autosummary',
'sphinx.ext.viewcode', # This extension adds links to highlighted source code
]
templates_path = ['_templates']
exclude_patterns = []
# nbsphinx configuration options
nbsphinx_allow_errors = True # Continue through notebook execution errors
#nbsphinx_execute = 'always' # Execute notebooks before converting
nbsphinx_execute = 'never' # Execute notebooks before converting
# If you want to include the content of the Jupyter notebook cells in the index
nbsphinx_prolog = """
.. raw:: html
<div class="nbsphinx-separator"></div>
"""
# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
html_theme = "sphinx_rtd_theme"
html_static_path = ['_static']
#extensions = [
# 'sphinx.ext.autodoc',
# 'sphinx.ext.napoleon',
#]

26
docs/source/index.rst Normal file
View File

@@ -0,0 +1,26 @@
.. DIMA documentation master file, created by
sphinx-quickstart on Wed Jul 10 15:50:06 2024.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Welcome to DIMA's documentation!
================================
.. toctree::
:maxdepth: 2
:caption: Contents:
modules/src
modules/pipelines
modules/utils
modules/notebooks
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

View File

View File

@@ -0,0 +1,7 @@
Notebooks
==========================
.. automodule:: notebooks
:members:
:undoc-members:
:show-inheritance:

View File

@@ -0,0 +1,12 @@
Pipelines and workflows
==========================
.. automodule:: pipelines.data_integration
:members:
:undoc-members:
:show-inheritance:
.. automodule:: pipelines.metadata_revision
:members:
:undoc-members:
:show-inheritance:

View File

@@ -0,0 +1,27 @@
HDF5 Data Operations
==========================
.. automodule:: src.hdf5_ops
:members:
:undoc-members:
:show-inheritance:
HDF5 Writer
==========================
.. automodule:: src.hdf5_writer
:members:
:undoc-members:
:show-inheritance:
Data Visualization
==================
.. automodule:: src.hdf5_vis
:members:
:undoc-members:
:show-inheritance:

View File

@@ -0,0 +1,7 @@
Data Structure Conversion
=========================
.. automodule:: utils.g5505_utils
:members:
:undoc-members:
:show-inheritance: