Refactor dataset merging procedure
This commit is contained in:
parent
e3de0f7217
commit
16a47cf3b3
@ -3,7 +3,7 @@ from pyzebra.ccl_findpeaks import ccl_findpeaks
|
||||
from pyzebra.ccl_io import export_1D, load_1D, parse_1D
|
||||
from pyzebra.fit2 import fitccl
|
||||
from pyzebra.h5 import *
|
||||
from pyzebra.merge_function import add_dict, normalize_all, unified_merge
|
||||
from pyzebra.xtal import *
|
||||
from pyzebra.ccl_process import normalize_dataset, merge_duplicates, merge_datasets
|
||||
|
||||
__version__ = "0.2.2"
|
||||
|
@ -110,7 +110,9 @@ def create():
|
||||
with open(file_select.value) as file:
|
||||
_, ext = os.path.splitext(file_select.value)
|
||||
det_data = pyzebra.parse_1D(file, ext)
|
||||
pyzebra.normalize_all(det_data)
|
||||
|
||||
pyzebra.normalize_dataset(det_data)
|
||||
pyzebra.merge_duplicates(det_data)
|
||||
|
||||
_init_datatable()
|
||||
|
||||
@ -121,9 +123,9 @@ def create():
|
||||
with open(file_select.value) as file:
|
||||
_, ext = os.path.splitext(file_select.value)
|
||||
append_data = pyzebra.parse_1D(file, ext)
|
||||
pyzebra.normalize_all(det_data)
|
||||
|
||||
pyzebra.unified_merge(det_data, append_data)
|
||||
pyzebra.normalize_dataset(append_data)
|
||||
pyzebra.merge_datasets(det_data, append_data)
|
||||
|
||||
_init_datatable()
|
||||
|
||||
@ -135,7 +137,9 @@ def create():
|
||||
with io.StringIO(base64.b64decode(new).decode()) as file:
|
||||
_, ext = os.path.splitext(upload_button.filename)
|
||||
det_data = pyzebra.parse_1D(file, ext)
|
||||
pyzebra.normalize_all(det_data)
|
||||
|
||||
pyzebra.normalize_dataset(det_data)
|
||||
pyzebra.merge_duplicates(det_data)
|
||||
|
||||
_init_datatable()
|
||||
|
||||
@ -148,9 +152,9 @@ def create():
|
||||
with io.StringIO(base64.b64decode(new).decode()) as file:
|
||||
_, ext = os.path.splitext(append_upload_button.filename)
|
||||
append_data = pyzebra.parse_1D(file, ext)
|
||||
pyzebra.normalize_all(det_data)
|
||||
|
||||
pyzebra.unified_merge(det_data, append_data)
|
||||
pyzebra.normalize_dataset(append_data)
|
||||
pyzebra.merge_datasets(det_data, append_data)
|
||||
|
||||
_init_datatable()
|
||||
|
||||
|
@ -75,7 +75,7 @@ def color_palette(n_colors):
|
||||
|
||||
|
||||
def create():
|
||||
det_data = {}
|
||||
det_data = []
|
||||
fit_params = {}
|
||||
peak_pos_textinput_lock = False
|
||||
js_data = {
|
||||
@ -124,7 +124,8 @@ def create():
|
||||
with open(file_select.value) as file:
|
||||
_, ext = os.path.splitext(file_select.value)
|
||||
det_data = pyzebra.parse_1D(file, ext)
|
||||
pyzebra.normalize_all(det_data)
|
||||
|
||||
pyzebra.normalize_dataset(det_data)
|
||||
|
||||
_init_datatable()
|
||||
|
||||
@ -135,8 +136,9 @@ def create():
|
||||
with open(file_select.value) as file:
|
||||
_, ext = os.path.splitext(file_select.value)
|
||||
append_data = pyzebra.parse_1D(file, ext)
|
||||
pyzebra.normalize_all(det_data)
|
||||
pyzebra.add_dict(det_data, append_data)
|
||||
|
||||
pyzebra.normalize_dataset(append_data)
|
||||
det_data.extend(append_data)
|
||||
|
||||
_init_datatable()
|
||||
|
||||
@ -145,17 +147,17 @@ def create():
|
||||
|
||||
def upload_button_callback(_attr, _old, new):
|
||||
nonlocal det_data
|
||||
det_data = {}
|
||||
det_data = []
|
||||
for f_str, f_name in zip(new, upload_button.filename):
|
||||
with io.StringIO(base64.b64decode(f_str).decode()) as file:
|
||||
_, ext = os.path.splitext(f_name)
|
||||
if det_data:
|
||||
append_data = pyzebra.parse_1D(file, ext)
|
||||
pyzebra.normalize_all(det_data)
|
||||
pyzebra.add_dict(det_data, append_data)
|
||||
pyzebra.normalize_dataset(append_data)
|
||||
det_data.extend(append_data)
|
||||
else:
|
||||
det_data = pyzebra.parse_1D(file, ext)
|
||||
pyzebra.normalize_all(det_data)
|
||||
pyzebra.normalize_dataset(det_data)
|
||||
|
||||
_init_datatable()
|
||||
|
||||
@ -168,8 +170,9 @@ def create():
|
||||
with io.StringIO(base64.b64decode(f_str).decode()) as file:
|
||||
_, ext = os.path.splitext(f_name)
|
||||
append_data = pyzebra.parse_1D(file, ext)
|
||||
pyzebra.normalize_all(det_data)
|
||||
pyzebra.add_dict(det_data, append_data)
|
||||
|
||||
pyzebra.normalize_dataset(append_data)
|
||||
det_data.extend(append_data)
|
||||
|
||||
_init_datatable()
|
||||
|
||||
|
64
pyzebra/ccl_process.py
Normal file
64
pyzebra/ccl_process.py
Normal file
@ -0,0 +1,64 @@
|
||||
import itertools
|
||||
|
||||
import numpy as np
|
||||
|
||||
from .ccl_io import CCL_ANGLES
|
||||
|
||||
PARAM_PRECISIONS = {
|
||||
"twotheta": 0.1,
|
||||
"chi": 0.1,
|
||||
"nu": 0.1,
|
||||
"phi": 0.05,
|
||||
"omega": 5,
|
||||
"gamma": 0.05,
|
||||
"temp": 1,
|
||||
"mf": 0.001,
|
||||
"ub": 0.01,
|
||||
}
|
||||
|
||||
|
||||
def normalize_dataset(dataset, monitor=100_000):
|
||||
for scan in dataset:
|
||||
monitor_ratio = monitor / scan["monitor"]
|
||||
scan["Counts"] *= monitor_ratio
|
||||
scan["monitor"] = monitor
|
||||
|
||||
|
||||
def merge_duplicates(dataset):
|
||||
for scan_i, scan_j in itertools.combinations(dataset, 2):
|
||||
if _parameters_match(scan_i, scan_j):
|
||||
_merge_scans(scan_i, scan_j)
|
||||
|
||||
|
||||
def _parameters_match(scan1, scan2):
|
||||
zebra_mode = scan1["zebra_mode"]
|
||||
if zebra_mode != scan2["zebra_mode"]:
|
||||
return False
|
||||
|
||||
for param in ("ub", "temp", "mf", *(vars[0] for vars in CCL_ANGLES[zebra_mode])):
|
||||
if np.max(np.abs(scan1[param] - scan2[param])) > PARAM_PRECISIONS[param]:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def merge_datasets(dataset1, dataset2):
|
||||
for scan_j in dataset2:
|
||||
for scan_i in dataset1:
|
||||
if _parameters_match(scan_i, scan_j):
|
||||
_merge_scans(scan_i, scan_j)
|
||||
break
|
||||
else:
|
||||
dataset1.append(scan_j)
|
||||
|
||||
|
||||
def _merge_scans(scan1, scan2):
|
||||
om = np.concatenate((scan1["om"], scan2["om"]))
|
||||
counts = np.concatenate((scan1["Counts"], scan2["Counts"]))
|
||||
|
||||
index = np.argsort(om)
|
||||
|
||||
scan1["om"] = om[index]
|
||||
scan1["Counts"] = counts[index]
|
||||
|
||||
print(f'Scan {scan2["idx"]} merged into {scan1["idx"]}')
|
@ -1,268 +0,0 @@
|
||||
import numpy as np
|
||||
import uncertainties as u
|
||||
|
||||
|
||||
def create_tuples(x, y, y_err):
|
||||
"""creates tuples for sorting and merginng of the data
|
||||
Counts need to be normalized to monitor before"""
|
||||
t = list()
|
||||
for i in range(len(x)):
|
||||
tup = (x[i], y[i], y_err[i])
|
||||
t.append(tup)
|
||||
return t
|
||||
|
||||
|
||||
def normalize_all(dictionary, monitor=100000):
|
||||
for scan in dictionary:
|
||||
counts = np.array(scan["Counts"])
|
||||
sigma = np.sqrt(counts) if "sigma" not in scan else scan["sigma"]
|
||||
monitor_ratio = monitor / scan["monitor"]
|
||||
scan["Counts"] = counts * monitor_ratio
|
||||
scan["sigma"] = np.array(sigma) * monitor_ratio
|
||||
scan["monitor"] = monitor
|
||||
print("Normalized %d scans to monitor %d" % (len(dictionary), monitor))
|
||||
|
||||
|
||||
def merge(scan1, scan2):
|
||||
"""merges the two tuples and sorts them, if om value is same, Counts value is average
|
||||
averaging is propagated into sigma if dict1 == dict2, key[1] is deleted after merging
|
||||
:arg dict1 : dictionary to which measurement will be merged
|
||||
:arg dict2 : dictionary from which measurement will be merged
|
||||
:arg scand_dict_result : result of scan_dict after auto function
|
||||
:arg keep : if true, when monitors are same, does not change it, if flase, takes monitor
|
||||
always
|
||||
:arg monitor : final monitor after merging
|
||||
note: dict1 and dict2 can be same dict
|
||||
:return dict1 with merged scan"""
|
||||
|
||||
# load om and Counts
|
||||
x1, x2 = scan1["om"], scan2["om"]
|
||||
# print(scan1["om"])
|
||||
# print(scan2["om"])
|
||||
cor_y1, y_err1 = scan1["Counts"], scan1["sigma"]
|
||||
cor_y2, y_err2 = scan2["Counts"], scan2["sigma"]
|
||||
# creates touples (om, Counts, sigma) for sorting and further processing
|
||||
tuple_list = create_tuples(x1, cor_y1, y_err1) + create_tuples(x2, cor_y2, y_err2)
|
||||
# Sort the list on om and add 0 0 0 tuple to the last position
|
||||
sorted_t = sorted(tuple_list, key=lambda tup: tup[0])
|
||||
sorted_t.append((0, 0, 0))
|
||||
om, Counts, sigma = [], [], []
|
||||
seen = list()
|
||||
for i in range(len(sorted_t) - 1):
|
||||
if sorted_t[i][0] not in seen:
|
||||
if sorted_t[i][0] != sorted_t[i + 1][0]:
|
||||
om = np.append(om, sorted_t[i][0])
|
||||
Counts = np.append(Counts, sorted_t[i][1])
|
||||
sigma = np.append(sigma, sorted_t[i][2])
|
||||
else:
|
||||
om = np.append(om, sorted_t[i][0])
|
||||
counts1, counts2 = sorted_t[i][1], sorted_t[i + 1][1]
|
||||
sigma1, sigma2 = sorted_t[i][2], sorted_t[i + 1][2]
|
||||
count_err1 = u.ufloat(counts1, sigma1)
|
||||
count_err2 = u.ufloat(counts2, sigma2)
|
||||
avg = (count_err1 + count_err2) / 2
|
||||
Counts = np.append(Counts, avg.n)
|
||||
sigma = np.append(sigma, avg.s)
|
||||
seen.append(sorted_t[i][0])
|
||||
else:
|
||||
continue
|
||||
scan1["om"] = om
|
||||
scan1["Counts"] = Counts
|
||||
scan1["sigma"] = sigma
|
||||
if "history" not in scan1:
|
||||
scan1["history"] = str("Merged with scan %d" % scan2["idx"])
|
||||
else:
|
||||
scan1["history"] = scan1["history"] + str(", merged with scan %d" % scan2["idx"])
|
||||
print("merging done")
|
||||
|
||||
|
||||
def check_UB(dict1, dict2, precision=0.01):
|
||||
return np.max(np.abs(dict1[0]["ub"] - dict2[0]["ub"])) < precision
|
||||
|
||||
|
||||
def check_zebramode(dict1, dict2):
|
||||
if dict1[0]["zebra_mode"] == dict2[0]["zebra_mode"]:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def check_angles(scan1, scan2, angles, precision):
|
||||
truth_list = list()
|
||||
for item in angles:
|
||||
if abs(abs(scan1[item]) - abs(scan2[item])) <= precision[item]:
|
||||
truth_list.append(True)
|
||||
else:
|
||||
truth_list.append(False)
|
||||
if all(truth_list):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def check_temp_mag(scan1, scan2):
|
||||
temp_diff = 1
|
||||
mag_diff = 0.001
|
||||
truth_list = list()
|
||||
try:
|
||||
if abs(abs(scan1["mf"]) - abs(scan2["mf"])) <= mag_diff:
|
||||
truth_list.append(True)
|
||||
else:
|
||||
truth_list.append(False)
|
||||
except KeyError:
|
||||
print("Magnetic field is missing")
|
||||
|
||||
try:
|
||||
if abs(abs(scan1["temp"]) - abs(scan2["temp"])) <= temp_diff:
|
||||
truth_list.append(True)
|
||||
else:
|
||||
truth_list.append(False)
|
||||
except KeyError:
|
||||
print("temperature missing")
|
||||
|
||||
if all(truth_list):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def merge_dups(dictionary):
|
||||
|
||||
if dictionary[0]["data_type"] == "dat":
|
||||
return
|
||||
|
||||
if dictionary[0]["zebra_mode"] == "bi":
|
||||
angles = ["twotheta", "omega", "chi", "phi"]
|
||||
elif dictionary[0]["zebra_mode"] == "nb":
|
||||
angles = ["gamma", "omega", "nu"]
|
||||
|
||||
precision = {
|
||||
"twotheta": 0.1,
|
||||
"chi": 0.1,
|
||||
"nu": 0.1,
|
||||
"phi": 0.05,
|
||||
"omega": 5,
|
||||
"gamma": 0.05,
|
||||
}
|
||||
|
||||
for i in range(len(dictionary)):
|
||||
for j in range(len(dictionary)):
|
||||
if i == j:
|
||||
continue
|
||||
else:
|
||||
# print(i, j)
|
||||
if check_angles(dictionary[i], dictionary[j], angles, precision) and check_temp_mag(
|
||||
dictionary[i], dictionary[j]
|
||||
):
|
||||
merge(dictionary[i], dictionary[j])
|
||||
print("merged %d with %d within the dictionary" % (i, j))
|
||||
|
||||
del dictionary[j]
|
||||
merge_dups(dictionary)
|
||||
break
|
||||
else:
|
||||
continue
|
||||
break
|
||||
|
||||
|
||||
def add_scan(dict1, dict2, scan_to_add):
|
||||
dict1.append(dict2[scan_to_add])
|
||||
del dict2[scan_to_add]
|
||||
|
||||
|
||||
def process(dict1, dict2, angles, precision):
|
||||
# stop when the second dict is empty
|
||||
if dict2:
|
||||
# check UB matrixes
|
||||
if check_UB(dict1, dict2):
|
||||
# iterate over second dict and check for matches
|
||||
for i in range(len(dict2)):
|
||||
for j in range(len(dict1)):
|
||||
if check_angles(dict1[j], dict2[i], angles, precision):
|
||||
# angles good, see the mag and temp
|
||||
if check_temp_mag(dict1[j], dict2[i]):
|
||||
merge(dict1[j], dict2[i])
|
||||
print("merged %d with %d from different dictionaries" % (i, j))
|
||||
del dict2[i]
|
||||
process(dict1, dict2, angles, precision)
|
||||
break
|
||||
else:
|
||||
add_scan(dict1, dict2, i)
|
||||
print("Diffrent T or M, scan added")
|
||||
process(dict1, dict2, angles, precision)
|
||||
break
|
||||
else:
|
||||
add_scan(dict1, dict2, i)
|
||||
print("Mismatch in angles, scan added")
|
||||
process(dict1, dict2, angles, precision)
|
||||
break
|
||||
else:
|
||||
continue
|
||||
break
|
||||
|
||||
else:
|
||||
# ask user if he really wants to add
|
||||
print("UBs are different, do you really wish to add datasets? Y/N")
|
||||
dict1 = add_dict(dict1, dict2)
|
||||
return
|
||||
|
||||
|
||||
"""
|
||||
1. check for bisecting or normal beam geometry in data files; select stt, om, chi, phi for bisecting; select stt, om, nu for normal beam
|
||||
2. in the ccl files, check for identical stt, chi and nu within 0.1 degree, and, at the same time, for identical om and phi within 0.05 degree;
|
||||
3. in the dat files, check for identical stt, chi and nu within 0.1 degree, and, at the same time,
|
||||
for identical phi within 0.05 degree, and, at the same time, for identical om within 5 degree."""
|
||||
|
||||
|
||||
def unified_merge(dict1, dict2):
|
||||
if not check_zebramode(dict1, dict2):
|
||||
print("You are trying to add two files with different zebra mdoe")
|
||||
return
|
||||
|
||||
# decide angles
|
||||
if dict1[0]["zebra_mode"] == "bi":
|
||||
angles = ["twotheta", "omega", "chi", "phi"]
|
||||
elif dict1[0]["zebra_mode"] == "nb":
|
||||
angles = ["gamma", "omega", "nu"]
|
||||
|
||||
# precision of angles to check
|
||||
precision = {
|
||||
"twotheta": 0.1,
|
||||
"chi": 0.1,
|
||||
"nu": 0.1,
|
||||
"phi": 0.05,
|
||||
"omega": 5,
|
||||
"gamma": 0.1,
|
||||
}
|
||||
if (dict1[0]["data_type"] == "ccl") and (dict2[0]["data_type"] == "ccl"):
|
||||
precision["omega"] = 0.05
|
||||
|
||||
process(dict1, dict2, angles, precision)
|
||||
|
||||
|
||||
def add_dict(dict1, dict2):
|
||||
"""adds two dictionaries, meta of the new is saved as meata+original_filename and
|
||||
measurements are shifted to continue with numbering of first dict
|
||||
:arg dict1 : dictionarry to add to
|
||||
:arg dict2 : dictionarry from which to take the measurements
|
||||
:return dict1 : combined dictionary
|
||||
Note: dict1 must be made from ccl, otherwise we would have to change the structure of loaded
|
||||
dat file"""
|
||||
try:
|
||||
if dict1[0]["zebra_mode"] != dict2[0]["zebra_mode"]:
|
||||
print("You are trying to add scans measured with different zebra modes")
|
||||
return
|
||||
# this is for the qscan case
|
||||
except KeyError:
|
||||
print("Zebra mode not specified")
|
||||
|
||||
for s in dict2:
|
||||
if s not in dict1:
|
||||
dict1.append(s)
|
||||
|
||||
else:
|
||||
print(
|
||||
"The file %s has alredy been added to %s"
|
||||
% (dict2[0]["original_filename"], dict1[0]["original_filename"])
|
||||
)
|
||||
return dict1
|
Loading…
x
Reference in New Issue
Block a user