From 0febab42924f63c4635c403e0de8f32e843fa633 Mon Sep 17 00:00:00 2001
From: JakHolzer <53743814+JakHolzer@users.noreply.github.com>
Date: Tue, 13 Oct 2020 12:18:38 +0200
Subject: [PATCH] Modules for parametric study

Loading files from txt and list of filenames, creating and sorting the dataframe and making of the graphs
---
 pyzebra/param_study_moduls.py | 146 ++++++++++++++++++++++++++++++++++
 1 file changed, 146 insertions(+)
 create mode 100644 pyzebra/param_study_moduls.py

diff --git a/pyzebra/param_study_moduls.py b/pyzebra/param_study_moduls.py
new file mode 100644
index 0000000..b2f3e2d
--- /dev/null
+++ b/pyzebra/param_study_moduls.py
@@ -0,0 +1,146 @@
+from load_1Dnn import load_1D
+from ccl_dict_operation import add_dict
+import pandas as pd
+from mpl_toolkits.mplot3d import Axes3D  # dont delete, otherwise waterfall wont work
+import matplotlib.pyplot as plt
+import matplotlib as mpl
+import numpy as np
+
+
+def load_dats(filepath):
+    """reads the txt file, get headers and data
+    :arg filepath to txt file or list of filepaths to the files
+    :return ccl like dictionary"""
+    if isinstance(filepath, str):
+        data_type = "txt"
+        file_list = list()
+        with open(filepath, "r") as infile:
+            col_names = next(infile).split()
+            for line in infile:
+                if "END" in line:
+                    break
+                file_list.append(tuple(line.split()))
+    elif isinstance(filepath, list):
+        data_type = "list"
+        file_list = filepath
+
+    print(file_list)
+    dict1 = {}
+    for i in range(len(file_list)):
+        if not dict1:
+            if data_type == "txt":
+                dict1 = load_1D(file_list[0][0])
+            else:
+                dict1 = load_1D(file_list[0])
+        else:
+            if data_type == "txt":
+                dict1 = add_dict(dict1, load_1D(file_list[i][0]))
+            else:
+                dict1 = add_dict(dict1, load_1D(file_list[i]))
+        dict1["Measurements"][str("M" + str(i + 1))]["params"] = {}
+        if data_type == "txt":
+            for x in range(len(col_names) - 1):
+                dict1["Measurements"][str("M" + str(i + 1))]["params"][
+                    col_names[x + 1]
+                ] = file_list[i][x + 1]
+
+    return dict1
+
+
+def create_dataframe(dict1):
+    """Creates pandas dataframe from the dictionary
+    :arg ccl like dictionary
+    :return pandas dataframe"""
+    # create dictionary to which we pull only wanted items before transforming it to pd.dataframe
+    pull_dict = {}
+    pull_dict["filenames"] = list()
+    for key in dict1["Measurements"]["M1"]["params"]:
+        pull_dict[key] = list()
+    pull_dict["temperature"] = list()
+    pull_dict["mag_field"] = list()
+    pull_dict["fit_area"] = list()
+    pull_dict["int_area"] = list()
+    pull_dict["om"] = list()
+    pull_dict["Counts"] = list()
+
+    # populate the dict
+    for keys in dict1["Measurements"]:
+        if "file_of_origin" in dict1["Measurements"][keys]:
+            pull_dict["filenames"].append(
+                dict1["Measurements"][keys]["file_of_origin"].split("/")[-1]
+            )
+        else:
+            pull_dict["filenames"].append(dict1["meta"]["original_filename"].split("/")[-1])
+        for key in dict1["Measurements"][keys]["params"]:
+            pull_dict[str(key)].append(float(dict1["Measurements"][keys]["params"][key]))
+        pull_dict["temperature"].append(dict1["Measurements"][keys]["temperature"])
+        pull_dict["mag_field"].append(dict1["Measurements"][keys]["mag_field"])
+        pull_dict["fit_area"].append(dict1["Measurements"][keys]["fit"]["fit_area"])
+        pull_dict["int_area"].append(dict1["Measurements"][keys]["fit"]["int_area"])
+        pull_dict["om"].append(dict1["Measurements"][keys]["om"])
+        pull_dict["Counts"].append(dict1["Measurements"][keys]["Counts"])
+
+    return pd.DataFrame(data=pull_dict)
+
+
+def sort_dataframe(dataframe, sorting_parameter):
+    """sorts the data frame and resets index"""
+    data = dataframe.sort_values(by=sorting_parameter)
+    data = data.reset_index(drop=True)
+    return data
+
+
+def make_graph(data, sorting_parameter, style):
+    """Makes the graph from the data based on style and sorting parameter
+    :arg data : pandas dataframe with data after sorting
+    :arg sorting_parameter to pull the correct variable and name
+    :arg style of the graph - waterfall, scatter, heatmap
+    :return matplotlib figure"""
+    if style == "waterfall":
+        mpl.rcParams["legend.fontsize"] = 10
+        fig = plt.figure()
+        ax = fig.gca(projection="3d")
+        for i in range(len(data)):
+            x = data["om"][i]
+            z = data["Counts"][i]
+            yy = [data[sorting_parameter][i]] * len(x)
+            ax.plot(x, yy, z, label=str("%s = %f" % (sorting_parameter, yy[i])))
+
+        ax.legend()
+        ax.set_xlabel("Omega")
+        ax.set_ylabel(sorting_parameter)
+        ax.set_zlabel("counts")
+
+    elif style == "scatter":
+        fig = plt.figure()
+        plt.errorbar(
+            data[sorting_parameter],
+            [data["fit_area"][i].n for i in range(len(data["fit_area"]))],
+            [data["fit_area"][i].s for i in range(len(data["fit_area"]))],
+            capsize=5,
+            ecolor="green",
+        )
+        plt.xlabel(str(sorting_parameter))
+        plt.ylabel("Intesity")
+
+    elif style == "heat":
+        new_om = list()
+        for i in range(len(data)):
+            new_om = np.append(new_om, np.around(data["om"][i], 2), axis=0)
+        unique_om = np.unique(new_om)
+        color_matrix = np.zeros(shape=(len(data), len(unique_om)))
+        for i in range(len(data)):
+            for j in range(len(data["om"][i])):
+                if np.around(data["om"][i][j], 2) in np.unique(new_om):
+                    color_matrix[i, j] = data["Counts"][i][j]
+                else:
+                    continue
+
+        fig = plt.figure()
+        plt.pcolormesh(unique_om, data[sorting_parameter], color_matrix, shading="gouraud")
+        plt.xlabel("omega")
+        plt.ylabel(sorting_parameter)
+        plt.colorbar()
+        plt.clim(color_matrix.mean(), color_matrix.max())
+
+    return fig