From 16a47cf3b3ace41e0412d86e6f63309e7f3c8ab8 Mon Sep 17 00:00:00 2001 From: Ivan Usov Date: Tue, 9 Feb 2021 13:07:51 +0100 Subject: [PATCH] Refactor dataset merging procedure --- pyzebra/__init__.py | 2 +- pyzebra/app/panel_ccl_integrate.py | 16 +- pyzebra/app/panel_param_study.py | 23 +-- pyzebra/ccl_process.py | 64 +++++++ pyzebra/merge_function.py | 268 ----------------------------- 5 files changed, 88 insertions(+), 285 deletions(-) create mode 100644 pyzebra/ccl_process.py delete mode 100644 pyzebra/merge_function.py diff --git a/pyzebra/__init__.py b/pyzebra/__init__.py index 3fdb57a..06a3edb 100644 --- a/pyzebra/__init__.py +++ b/pyzebra/__init__.py @@ -3,7 +3,7 @@ from pyzebra.ccl_findpeaks import ccl_findpeaks from pyzebra.ccl_io import export_1D, load_1D, parse_1D from pyzebra.fit2 import fitccl from pyzebra.h5 import * -from pyzebra.merge_function import add_dict, normalize_all, unified_merge from pyzebra.xtal import * +from pyzebra.ccl_process import normalize_dataset, merge_duplicates, merge_datasets __version__ = "0.2.2" diff --git a/pyzebra/app/panel_ccl_integrate.py b/pyzebra/app/panel_ccl_integrate.py index 15f0d4b..c95f235 100644 --- a/pyzebra/app/panel_ccl_integrate.py +++ b/pyzebra/app/panel_ccl_integrate.py @@ -110,7 +110,9 @@ def create(): with open(file_select.value) as file: _, ext = os.path.splitext(file_select.value) det_data = pyzebra.parse_1D(file, ext) - pyzebra.normalize_all(det_data) + + pyzebra.normalize_dataset(det_data) + pyzebra.merge_duplicates(det_data) _init_datatable() @@ -121,9 +123,9 @@ def create(): with open(file_select.value) as file: _, ext = os.path.splitext(file_select.value) append_data = pyzebra.parse_1D(file, ext) - pyzebra.normalize_all(det_data) - pyzebra.unified_merge(det_data, append_data) + pyzebra.normalize_dataset(append_data) + pyzebra.merge_datasets(det_data, append_data) _init_datatable() @@ -135,7 +137,9 @@ def create(): with io.StringIO(base64.b64decode(new).decode()) as file: _, ext = os.path.splitext(upload_button.filename) det_data = pyzebra.parse_1D(file, ext) - pyzebra.normalize_all(det_data) + + pyzebra.normalize_dataset(det_data) + pyzebra.merge_duplicates(det_data) _init_datatable() @@ -148,9 +152,9 @@ def create(): with io.StringIO(base64.b64decode(new).decode()) as file: _, ext = os.path.splitext(append_upload_button.filename) append_data = pyzebra.parse_1D(file, ext) - pyzebra.normalize_all(det_data) - pyzebra.unified_merge(det_data, append_data) + pyzebra.normalize_dataset(append_data) + pyzebra.merge_datasets(det_data, append_data) _init_datatable() diff --git a/pyzebra/app/panel_param_study.py b/pyzebra/app/panel_param_study.py index aaa9bca..fa7488a 100644 --- a/pyzebra/app/panel_param_study.py +++ b/pyzebra/app/panel_param_study.py @@ -75,7 +75,7 @@ def color_palette(n_colors): def create(): - det_data = {} + det_data = [] fit_params = {} peak_pos_textinput_lock = False js_data = { @@ -124,7 +124,8 @@ def create(): with open(file_select.value) as file: _, ext = os.path.splitext(file_select.value) det_data = pyzebra.parse_1D(file, ext) - pyzebra.normalize_all(det_data) + + pyzebra.normalize_dataset(det_data) _init_datatable() @@ -135,8 +136,9 @@ def create(): with open(file_select.value) as file: _, ext = os.path.splitext(file_select.value) append_data = pyzebra.parse_1D(file, ext) - pyzebra.normalize_all(det_data) - pyzebra.add_dict(det_data, append_data) + + pyzebra.normalize_dataset(append_data) + det_data.extend(append_data) _init_datatable() @@ -145,17 +147,17 @@ def create(): def upload_button_callback(_attr, _old, new): nonlocal det_data - det_data = {} + det_data = [] for f_str, f_name in zip(new, upload_button.filename): with io.StringIO(base64.b64decode(f_str).decode()) as file: _, ext = os.path.splitext(f_name) if det_data: append_data = pyzebra.parse_1D(file, ext) - pyzebra.normalize_all(det_data) - pyzebra.add_dict(det_data, append_data) + pyzebra.normalize_dataset(append_data) + det_data.extend(append_data) else: det_data = pyzebra.parse_1D(file, ext) - pyzebra.normalize_all(det_data) + pyzebra.normalize_dataset(det_data) _init_datatable() @@ -168,8 +170,9 @@ def create(): with io.StringIO(base64.b64decode(f_str).decode()) as file: _, ext = os.path.splitext(f_name) append_data = pyzebra.parse_1D(file, ext) - pyzebra.normalize_all(det_data) - pyzebra.add_dict(det_data, append_data) + + pyzebra.normalize_dataset(append_data) + det_data.extend(append_data) _init_datatable() diff --git a/pyzebra/ccl_process.py b/pyzebra/ccl_process.py new file mode 100644 index 0000000..555f210 --- /dev/null +++ b/pyzebra/ccl_process.py @@ -0,0 +1,64 @@ +import itertools + +import numpy as np + +from .ccl_io import CCL_ANGLES + +PARAM_PRECISIONS = { + "twotheta": 0.1, + "chi": 0.1, + "nu": 0.1, + "phi": 0.05, + "omega": 5, + "gamma": 0.05, + "temp": 1, + "mf": 0.001, + "ub": 0.01, +} + + +def normalize_dataset(dataset, monitor=100_000): + for scan in dataset: + monitor_ratio = monitor / scan["monitor"] + scan["Counts"] *= monitor_ratio + scan["monitor"] = monitor + + +def merge_duplicates(dataset): + for scan_i, scan_j in itertools.combinations(dataset, 2): + if _parameters_match(scan_i, scan_j): + _merge_scans(scan_i, scan_j) + + +def _parameters_match(scan1, scan2): + zebra_mode = scan1["zebra_mode"] + if zebra_mode != scan2["zebra_mode"]: + return False + + for param in ("ub", "temp", "mf", *(vars[0] for vars in CCL_ANGLES[zebra_mode])): + if np.max(np.abs(scan1[param] - scan2[param])) > PARAM_PRECISIONS[param]: + return False + + return True + + +def merge_datasets(dataset1, dataset2): + for scan_j in dataset2: + for scan_i in dataset1: + if _parameters_match(scan_i, scan_j): + _merge_scans(scan_i, scan_j) + break + else: + dataset1.append(scan_j) + + +def _merge_scans(scan1, scan2): + om = np.concatenate((scan1["om"], scan2["om"])) + counts = np.concatenate((scan1["Counts"], scan2["Counts"])) + + index = np.argsort(om) + + scan1["om"] = om[index] + scan1["Counts"] = counts[index] + + print(f'Scan {scan2["idx"]} merged into {scan1["idx"]}') diff --git a/pyzebra/merge_function.py b/pyzebra/merge_function.py deleted file mode 100644 index 17cd9a4..0000000 --- a/pyzebra/merge_function.py +++ /dev/null @@ -1,268 +0,0 @@ -import numpy as np -import uncertainties as u - - -def create_tuples(x, y, y_err): - """creates tuples for sorting and merginng of the data - Counts need to be normalized to monitor before""" - t = list() - for i in range(len(x)): - tup = (x[i], y[i], y_err[i]) - t.append(tup) - return t - - -def normalize_all(dictionary, monitor=100000): - for scan in dictionary: - counts = np.array(scan["Counts"]) - sigma = np.sqrt(counts) if "sigma" not in scan else scan["sigma"] - monitor_ratio = monitor / scan["monitor"] - scan["Counts"] = counts * monitor_ratio - scan["sigma"] = np.array(sigma) * monitor_ratio - scan["monitor"] = monitor - print("Normalized %d scans to monitor %d" % (len(dictionary), monitor)) - - -def merge(scan1, scan2): - """merges the two tuples and sorts them, if om value is same, Counts value is average - averaging is propagated into sigma if dict1 == dict2, key[1] is deleted after merging - :arg dict1 : dictionary to which measurement will be merged - :arg dict2 : dictionary from which measurement will be merged - :arg scand_dict_result : result of scan_dict after auto function - :arg keep : if true, when monitors are same, does not change it, if flase, takes monitor - always - :arg monitor : final monitor after merging - note: dict1 and dict2 can be same dict - :return dict1 with merged scan""" - - # load om and Counts - x1, x2 = scan1["om"], scan2["om"] - # print(scan1["om"]) - # print(scan2["om"]) - cor_y1, y_err1 = scan1["Counts"], scan1["sigma"] - cor_y2, y_err2 = scan2["Counts"], scan2["sigma"] - # creates touples (om, Counts, sigma) for sorting and further processing - tuple_list = create_tuples(x1, cor_y1, y_err1) + create_tuples(x2, cor_y2, y_err2) - # Sort the list on om and add 0 0 0 tuple to the last position - sorted_t = sorted(tuple_list, key=lambda tup: tup[0]) - sorted_t.append((0, 0, 0)) - om, Counts, sigma = [], [], [] - seen = list() - for i in range(len(sorted_t) - 1): - if sorted_t[i][0] not in seen: - if sorted_t[i][0] != sorted_t[i + 1][0]: - om = np.append(om, sorted_t[i][0]) - Counts = np.append(Counts, sorted_t[i][1]) - sigma = np.append(sigma, sorted_t[i][2]) - else: - om = np.append(om, sorted_t[i][0]) - counts1, counts2 = sorted_t[i][1], sorted_t[i + 1][1] - sigma1, sigma2 = sorted_t[i][2], sorted_t[i + 1][2] - count_err1 = u.ufloat(counts1, sigma1) - count_err2 = u.ufloat(counts2, sigma2) - avg = (count_err1 + count_err2) / 2 - Counts = np.append(Counts, avg.n) - sigma = np.append(sigma, avg.s) - seen.append(sorted_t[i][0]) - else: - continue - scan1["om"] = om - scan1["Counts"] = Counts - scan1["sigma"] = sigma - if "history" not in scan1: - scan1["history"] = str("Merged with scan %d" % scan2["idx"]) - else: - scan1["history"] = scan1["history"] + str(", merged with scan %d" % scan2["idx"]) - print("merging done") - - -def check_UB(dict1, dict2, precision=0.01): - return np.max(np.abs(dict1[0]["ub"] - dict2[0]["ub"])) < precision - - -def check_zebramode(dict1, dict2): - if dict1[0]["zebra_mode"] == dict2[0]["zebra_mode"]: - return True - else: - return False - - -def check_angles(scan1, scan2, angles, precision): - truth_list = list() - for item in angles: - if abs(abs(scan1[item]) - abs(scan2[item])) <= precision[item]: - truth_list.append(True) - else: - truth_list.append(False) - if all(truth_list): - return True - else: - return False - - -def check_temp_mag(scan1, scan2): - temp_diff = 1 - mag_diff = 0.001 - truth_list = list() - try: - if abs(abs(scan1["mf"]) - abs(scan2["mf"])) <= mag_diff: - truth_list.append(True) - else: - truth_list.append(False) - except KeyError: - print("Magnetic field is missing") - - try: - if abs(abs(scan1["temp"]) - abs(scan2["temp"])) <= temp_diff: - truth_list.append(True) - else: - truth_list.append(False) - except KeyError: - print("temperature missing") - - if all(truth_list): - return True - else: - return False - - -def merge_dups(dictionary): - - if dictionary[0]["data_type"] == "dat": - return - - if dictionary[0]["zebra_mode"] == "bi": - angles = ["twotheta", "omega", "chi", "phi"] - elif dictionary[0]["zebra_mode"] == "nb": - angles = ["gamma", "omega", "nu"] - - precision = { - "twotheta": 0.1, - "chi": 0.1, - "nu": 0.1, - "phi": 0.05, - "omega": 5, - "gamma": 0.05, - } - - for i in range(len(dictionary)): - for j in range(len(dictionary)): - if i == j: - continue - else: - # print(i, j) - if check_angles(dictionary[i], dictionary[j], angles, precision) and check_temp_mag( - dictionary[i], dictionary[j] - ): - merge(dictionary[i], dictionary[j]) - print("merged %d with %d within the dictionary" % (i, j)) - - del dictionary[j] - merge_dups(dictionary) - break - else: - continue - break - - -def add_scan(dict1, dict2, scan_to_add): - dict1.append(dict2[scan_to_add]) - del dict2[scan_to_add] - - -def process(dict1, dict2, angles, precision): - # stop when the second dict is empty - if dict2: - # check UB matrixes - if check_UB(dict1, dict2): - # iterate over second dict and check for matches - for i in range(len(dict2)): - for j in range(len(dict1)): - if check_angles(dict1[j], dict2[i], angles, precision): - # angles good, see the mag and temp - if check_temp_mag(dict1[j], dict2[i]): - merge(dict1[j], dict2[i]) - print("merged %d with %d from different dictionaries" % (i, j)) - del dict2[i] - process(dict1, dict2, angles, precision) - break - else: - add_scan(dict1, dict2, i) - print("Diffrent T or M, scan added") - process(dict1, dict2, angles, precision) - break - else: - add_scan(dict1, dict2, i) - print("Mismatch in angles, scan added") - process(dict1, dict2, angles, precision) - break - else: - continue - break - - else: - # ask user if he really wants to add - print("UBs are different, do you really wish to add datasets? Y/N") - dict1 = add_dict(dict1, dict2) - return - - -""" - 1. check for bisecting or normal beam geometry in data files; select stt, om, chi, phi for bisecting; select stt, om, nu for normal beam - 2. in the ccl files, check for identical stt, chi and nu within 0.1 degree, and, at the same time, for identical om and phi within 0.05 degree; - 3. in the dat files, check for identical stt, chi and nu within 0.1 degree, and, at the same time, - for identical phi within 0.05 degree, and, at the same time, for identical om within 5 degree.""" - - -def unified_merge(dict1, dict2): - if not check_zebramode(dict1, dict2): - print("You are trying to add two files with different zebra mdoe") - return - - # decide angles - if dict1[0]["zebra_mode"] == "bi": - angles = ["twotheta", "omega", "chi", "phi"] - elif dict1[0]["zebra_mode"] == "nb": - angles = ["gamma", "omega", "nu"] - - # precision of angles to check - precision = { - "twotheta": 0.1, - "chi": 0.1, - "nu": 0.1, - "phi": 0.05, - "omega": 5, - "gamma": 0.1, - } - if (dict1[0]["data_type"] == "ccl") and (dict2[0]["data_type"] == "ccl"): - precision["omega"] = 0.05 - - process(dict1, dict2, angles, precision) - - -def add_dict(dict1, dict2): - """adds two dictionaries, meta of the new is saved as meata+original_filename and - measurements are shifted to continue with numbering of first dict - :arg dict1 : dictionarry to add to - :arg dict2 : dictionarry from which to take the measurements - :return dict1 : combined dictionary - Note: dict1 must be made from ccl, otherwise we would have to change the structure of loaded - dat file""" - try: - if dict1[0]["zebra_mode"] != dict2[0]["zebra_mode"]: - print("You are trying to add scans measured with different zebra modes") - return - # this is for the qscan case - except KeyError: - print("Zebra mode not specified") - - for s in dict2: - if s not in dict1: - dict1.append(s) - - else: - print( - "The file %s has alredy been added to %s" - % (dict2[0]["original_filename"], dict1[0]["original_filename"]) - ) - return dict1