Refactor dataset merging procedure

2021-02-09 13:07:51 +01:00 · 2021-02-09 13:07:51 +01:00 · 16a47cf3b3
commit 16a47cf3b3
parent e3de0f7217
5 changed files with 88 additions and 285 deletions
--- a/pyzebra/init.py
+++ b/pyzebra/init.py
@ -3,7 +3,7 @@ from pyzebra.ccl_findpeaks import ccl_findpeaks
 from pyzebra.ccl_io import export_1D, load_1D, parse_1D
 from pyzebra.fit2 import fitccl
 from pyzebra.h5 import *
 from pyzebra.merge_function import add_dict, normalize_all, unified_merge
 from pyzebra.xtal import *
 from pyzebra.ccl_process import normalize_dataset, merge_duplicates, merge_datasets
 __version__ = "0.2.2"
--- a/pyzebra/app/panel_ccl_integrate.py
+++ b/pyzebra/app/panel_ccl_integrate.py
@ -110,7 +110,9 @@ def create():
        with open(file_select.value) as file:
            _, ext = os.path.splitext(file_select.value)
            det_data = pyzebra.parse_1D(file, ext)
-            pyzebra.normalize_all(det_data)
+
        pyzebra.normalize_dataset(det_data)
        pyzebra.merge_duplicates(det_data)
        _init_datatable()
@ -121,9 +123,9 @@ def create():
        with open(file_select.value) as file:
            _, ext = os.path.splitext(file_select.value)
            append_data = pyzebra.parse_1D(file, ext)
            pyzebra.normalize_all(det_data)
-        pyzebra.unified_merge(det_data, append_data)
+        pyzebra.normalize_dataset(append_data)
        pyzebra.merge_datasets(det_data, append_data)
        _init_datatable()
@ -135,7 +137,9 @@ def create():
        with io.StringIO(base64.b64decode(new).decode()) as file:
            _, ext = os.path.splitext(upload_button.filename)
            det_data = pyzebra.parse_1D(file, ext)
-            pyzebra.normalize_all(det_data)
+
        pyzebra.normalize_dataset(det_data)
        pyzebra.merge_duplicates(det_data)
        _init_datatable()
@ -148,9 +152,9 @@ def create():
        with io.StringIO(base64.b64decode(new).decode()) as file:
            _, ext = os.path.splitext(append_upload_button.filename)
            append_data = pyzebra.parse_1D(file, ext)
            pyzebra.normalize_all(det_data)
-        pyzebra.unified_merge(det_data, append_data)
+        pyzebra.normalize_dataset(append_data)
        pyzebra.merge_datasets(det_data, append_data)
        _init_datatable()
--- a/pyzebra/app/panel_param_study.py
+++ b/pyzebra/app/panel_param_study.py
@ -75,7 +75,7 @@ def color_palette(n_colors):
 def create():
-    det_data = {}
+    det_data = []
    fit_params = {}
    peak_pos_textinput_lock = False
    js_data = {
@ -124,7 +124,8 @@ def create():
        with open(file_select.value) as file:
            _, ext = os.path.splitext(file_select.value)
            det_data = pyzebra.parse_1D(file, ext)
-            pyzebra.normalize_all(det_data)
+
        pyzebra.normalize_dataset(det_data)
        _init_datatable()
@ -135,8 +136,9 @@ def create():
        with open(file_select.value) as file:
            _, ext = os.path.splitext(file_select.value)
            append_data = pyzebra.parse_1D(file, ext)
-            pyzebra.normalize_all(det_data)
+
-            pyzebra.add_dict(det_data, append_data)
+        pyzebra.normalize_dataset(append_data)
        det_data.extend(append_data)
        _init_datatable()
@ -145,17 +147,17 @@ def create():
    def upload_button_callback(_attr, _old, new):
        nonlocal det_data
-        det_data = {}
+        det_data = []
        for f_str, f_name in zip(new, upload_button.filename):
            with io.StringIO(base64.b64decode(f_str).decode()) as file:
                _, ext = os.path.splitext(f_name)
                if det_data:
                    append_data = pyzebra.parse_1D(file, ext)
-                    pyzebra.normalize_all(det_data)
+                    pyzebra.normalize_dataset(append_data)
-                    pyzebra.add_dict(det_data, append_data)
+                    det_data.extend(append_data)
                else:
                    det_data = pyzebra.parse_1D(file, ext)
-                    pyzebra.normalize_all(det_data)
+                    pyzebra.normalize_dataset(det_data)
        _init_datatable()
@ -168,8 +170,9 @@ def create():
            with io.StringIO(base64.b64decode(f_str).decode()) as file:
                _, ext = os.path.splitext(f_name)
                append_data = pyzebra.parse_1D(file, ext)
-                pyzebra.normalize_all(det_data)
+
-                pyzebra.add_dict(det_data, append_data)
+            pyzebra.normalize_dataset(append_data)
            det_data.extend(append_data)
        _init_datatable()
--- a/pyzebra/ccl_process.py
+++ b/pyzebra/ccl_process.py
@ -0,0 +1,64 @@
 import itertools
 import numpy as np
 from .ccl_io import CCL_ANGLES
 PARAM_PRECISIONS = {
    "twotheta": 0.1,
    "chi": 0.1,
    "nu": 0.1,
    "phi": 0.05,
    "omega": 5,
    "gamma": 0.05,
    "temp": 1,
    "mf": 0.001,
    "ub": 0.01,
 }
 def normalize_dataset(dataset, monitor=100_000):
    for scan in dataset:
        monitor_ratio = monitor / scan["monitor"]
        scan["Counts"] *= monitor_ratio
        scan["monitor"] = monitor
 def merge_duplicates(dataset):
    for scan_i, scan_j in itertools.combinations(dataset, 2):
        if _parameters_match(scan_i, scan_j):
            _merge_scans(scan_i, scan_j)
 def _parameters_match(scan1, scan2):
    zebra_mode = scan1["zebra_mode"]
    if zebra_mode != scan2["zebra_mode"]:
        return False
    for param in ("ub", "temp", "mf", *(vars[0] for vars in CCL_ANGLES[zebra_mode])):
        if np.max(np.abs(scan1[param] - scan2[param])) > PARAM_PRECISIONS[param]:
            return False
    return True
 def merge_datasets(dataset1, dataset2):
    for scan_j in dataset2:
        for scan_i in dataset1:
            if _parameters_match(scan_i, scan_j):
                _merge_scans(scan_i, scan_j)
                break
        else:
            dataset1.append(scan_j)
 def _merge_scans(scan1, scan2):
    om = np.concatenate((scan1["om"], scan2["om"]))
    counts = np.concatenate((scan1["Counts"], scan2["Counts"]))
    index = np.argsort(om)
    scan1["om"] = om[index]
    scan1["Counts"] = counts[index]
    print(f'Scan {scan2["idx"]} merged into {scan1["idx"]}')
--- a/pyzebra/merge_function.py
+++ b/pyzebra/merge_function.py
@ -1,268 +0,0 @@
 import numpy as np
 import uncertainties as u
 def create_tuples(x, y, y_err):
    """creates tuples for sorting and merginng of the data
    Counts need to be normalized to monitor before"""
    t = list()
    for i in range(len(x)):
        tup = (x[i], y[i], y_err[i])
        t.append(tup)
    return t
 def normalize_all(dictionary, monitor=100000):
    for scan in dictionary:
        counts = np.array(scan["Counts"])
        sigma = np.sqrt(counts) if "sigma" not in scan else scan["sigma"]
        monitor_ratio = monitor / scan["monitor"]
        scan["Counts"] = counts * monitor_ratio
        scan["sigma"] = np.array(sigma) * monitor_ratio
        scan["monitor"] = monitor
    print("Normalized %d scans to monitor %d" % (len(dictionary), monitor))
 def merge(scan1, scan2):
    """merges the two tuples and sorts them, if om value is same, Counts value is average
    averaging is propagated into sigma if dict1 == dict2, key[1] is deleted after merging
    :arg dict1 : dictionary to which measurement will be merged
    :arg dict2 : dictionary from which measurement will be merged
    :arg scand_dict_result : result of scan_dict after auto function
    :arg keep : if true, when monitors are same, does not change it, if flase, takes monitor
    always
    :arg monitor : final monitor after merging
    note: dict1 and dict2 can be same dict
    :return dict1 with merged scan"""
    # load om and Counts
    x1, x2 = scan1["om"], scan2["om"]
    # print(scan1["om"])
    # print(scan2["om"])
    cor_y1, y_err1 = scan1["Counts"], scan1["sigma"]
    cor_y2, y_err2 = scan2["Counts"], scan2["sigma"]
    # creates touples (om, Counts, sigma) for sorting and further processing
    tuple_list = create_tuples(x1, cor_y1, y_err1) + create_tuples(x2, cor_y2, y_err2)
    # Sort the list on om and add 0 0 0 tuple to the last position
    sorted_t = sorted(tuple_list, key=lambda tup: tup[0])
    sorted_t.append((0, 0, 0))
    om, Counts, sigma = [], [], []
    seen = list()
    for i in range(len(sorted_t) - 1):
        if sorted_t[i][0] not in seen:
            if sorted_t[i][0] != sorted_t[i + 1][0]:
                om = np.append(om, sorted_t[i][0])
                Counts = np.append(Counts, sorted_t[i][1])
                sigma = np.append(sigma, sorted_t[i][2])
            else:
                om = np.append(om, sorted_t[i][0])
                counts1, counts2 = sorted_t[i][1], sorted_t[i + 1][1]
                sigma1, sigma2 = sorted_t[i][2], sorted_t[i + 1][2]
                count_err1 = u.ufloat(counts1, sigma1)
                count_err2 = u.ufloat(counts2, sigma2)
                avg = (count_err1 + count_err2) / 2
                Counts = np.append(Counts, avg.n)
                sigma = np.append(sigma, avg.s)
                seen.append(sorted_t[i][0])
        else:
            continue
    scan1["om"] = om
    scan1["Counts"] = Counts
    scan1["sigma"] = sigma
    if "history" not in scan1:
        scan1["history"] = str("Merged with scan %d" % scan2["idx"])
    else:
        scan1["history"] = scan1["history"] + str(", merged with scan %d" % scan2["idx"])
    print("merging done")
 def check_UB(dict1, dict2, precision=0.01):
    return np.max(np.abs(dict1[0]["ub"] - dict2[0]["ub"])) < precision
 def check_zebramode(dict1, dict2):
    if dict1[0]["zebra_mode"] == dict2[0]["zebra_mode"]:
        return True
    else:
        return False
 def check_angles(scan1, scan2, angles, precision):
    truth_list = list()
    for item in angles:
        if abs(abs(scan1[item]) - abs(scan2[item])) <= precision[item]:
            truth_list.append(True)
        else:
            truth_list.append(False)
    if all(truth_list):
        return True
    else:
        return False
 def check_temp_mag(scan1, scan2):
    temp_diff = 1
    mag_diff = 0.001
    truth_list = list()
    try:
        if abs(abs(scan1["mf"]) - abs(scan2["mf"])) <= mag_diff:
            truth_list.append(True)
        else:
            truth_list.append(False)
    except KeyError:
        print("Magnetic field is missing")
    try:
        if abs(abs(scan1["temp"]) - abs(scan2["temp"])) <= temp_diff:
            truth_list.append(True)
        else:
            truth_list.append(False)
    except KeyError:
        print("temperature missing")
    if all(truth_list):
        return True
    else:
        return False
 def merge_dups(dictionary):
    if dictionary[0]["data_type"] == "dat":
        return
    if dictionary[0]["zebra_mode"] == "bi":
        angles = ["twotheta", "omega", "chi", "phi"]
    elif dictionary[0]["zebra_mode"] == "nb":
        angles = ["gamma", "omega", "nu"]
    precision = {
        "twotheta": 0.1,
        "chi": 0.1,
        "nu": 0.1,
        "phi": 0.05,
        "omega": 5,
        "gamma": 0.05,
    }
    for i in range(len(dictionary)):
        for j in range(len(dictionary)):
            if i == j:
                continue
            else:
                # print(i, j)
                if check_angles(dictionary[i], dictionary[j], angles, precision) and check_temp_mag(
                    dictionary[i], dictionary[j]
                ):
                    merge(dictionary[i], dictionary[j])
                    print("merged %d with %d within the dictionary" % (i, j))
                    del dictionary[j]
                    merge_dups(dictionary)
                    break
        else:
            continue
        break
 def add_scan(dict1, dict2, scan_to_add):
    dict1.append(dict2[scan_to_add])
    del dict2[scan_to_add]
 def process(dict1, dict2, angles, precision):
    # stop when the second dict is empty
    if dict2:
        # check UB matrixes
        if check_UB(dict1, dict2):
            # iterate over second dict and check for matches
            for i in range(len(dict2)):
                for j in range(len(dict1)):
                    if check_angles(dict1[j], dict2[i], angles, precision):
                        # angles good, see the mag and temp
                        if check_temp_mag(dict1[j], dict2[i]):
                            merge(dict1[j], dict2[i])
                            print("merged %d with %d from different dictionaries" % (i, j))
                            del dict2[i]
                            process(dict1, dict2, angles, precision)
                            break
                        else:
                            add_scan(dict1, dict2, i)
                            print("Diffrent T or M, scan added")
                            process(dict1, dict2, angles, precision)
                            break
                    else:
                        add_scan(dict1, dict2, i)
                        print("Mismatch in angles, scan added")
                        process(dict1, dict2, angles, precision)
                        break
                else:
                    continue
                break
        else:
            # ask user if he really wants to add
            print("UBs are different, do you really wish to add  datasets? Y/N")
            dict1 = add_dict(dict1, dict2)
    return
 """
    1. check for bisecting or normal beam geometry in data files; select stt, om, chi, phi for bisecting; select stt, om, nu for normal beam
    2. in the ccl files, check for identical stt, chi and nu within 0.1 degree, and, at the same time, for identical om and phi within 0.05 degree;
    3. in the dat files, check for identical stt, chi and nu within 0.1 degree, and, at the same time,
    for identical phi within 0.05 degree, and, at the same time, for identical om within 5 degree."""
 def unified_merge(dict1, dict2):
    if not check_zebramode(dict1, dict2):
        print("You are trying to add two files with different zebra mdoe")
        return
    # decide angles
    if dict1[0]["zebra_mode"] == "bi":
        angles = ["twotheta", "omega", "chi", "phi"]
    elif dict1[0]["zebra_mode"] == "nb":
        angles = ["gamma", "omega", "nu"]
    # precision of angles to check
    precision = {
        "twotheta": 0.1,
        "chi": 0.1,
        "nu": 0.1,
        "phi": 0.05,
        "omega": 5,
        "gamma": 0.1,
    }
    if (dict1[0]["data_type"] == "ccl") and (dict2[0]["data_type"] == "ccl"):
        precision["omega"] = 0.05
    process(dict1, dict2, angles, precision)
 def add_dict(dict1, dict2):
    """adds two dictionaries, meta of the new is saved as meata+original_filename and
    measurements are shifted to continue with numbering of first dict
    :arg dict1 : dictionarry to add to
    :arg dict2 : dictionarry from which to take the measurements
    :return dict1 : combined dictionary
    Note: dict1 must be made from ccl, otherwise we would have to change the structure of loaded
    dat file"""
    try:
        if dict1[0]["zebra_mode"] != dict2[0]["zebra_mode"]:
            print("You are trying to add scans measured with different zebra modes")
            return
    # this is for the qscan case
    except KeyError:
        print("Zebra mode not specified")
    for s in dict2:
        if s not in dict1:
            dict1.append(s)
        else:
            print(
                "The file %s has alredy been added to %s"
                % (dict2[0]["original_filename"], dict1[0]["original_filename"])
            )
    return dict1