Update param_study_moduls.py

Updated the create_dataframe and added function called variables, which tries to decide which variables to plot in parametric study and q scans. Works good for primary variable (usually om), and reduces the secondary (slice variable, temperature, mag.field,...) variables to a few candidates from which one has to be picked. In one set for param study, it identified all parameters correctly, in q scan, the temperature varied as well as H index, so technically both could be used, but only one makes sense and that will have to be picked by user.
This commit is contained in:
JakHolzer 2020-10-30 11:45:24 +01:00 committed by GitHub
parent 0856705024
commit dbc643aba9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -7,10 +7,10 @@ import pandas as pd
import scipy.io as sio import scipy.io as sio
import uncertainties as u import uncertainties as u
from mpl_toolkits.mplot3d import Axes3D # dont delete, otherwise waterfall wont work from mpl_toolkits.mplot3d import Axes3D # dont delete, otherwise waterfall wont work
import collections
from .ccl_io import load_1D from .ccl_io import load_1D
def create_tuples(x, y, y_err): def create_tuples(x, y, y_err):
"""creates tuples for sorting and merginng of the data """creates tuples for sorting and merginng of the data
Counts need to be normalized to monitor before""" Counts need to be normalized to monitor before"""
@ -49,45 +49,45 @@ def load_dats(filepath):
if data_type == "txt": if data_type == "txt":
dict1 = add_dict(dict1, load_1D(file_list[i][0])) dict1 = add_dict(dict1, load_1D(file_list[i][0]))
else: else:
dict1 = add_dict(dict1, load_1D(file_list[i])) dict1 = add_dict(dict1, load_1D(file_list[i]))
dict1["scan"][i + 1]["params"] = {} dict1["scan"][i + 1]["params"] = {}
if data_type == "txt": if data_type == "txt":
for x in range(len(col_names) - 1): for x in range(len(col_names) - 1):
dict1["scan"][i + 1]["params"][col_names[x + 1]] = file_list[i][x + 1] dict1["scan"][i + 1]["params"][col_names[x + 1]] = float(file_list[i][x + 1])
return dict1 return dict1
def create_dataframe(dict1): def create_dataframe(dict1, variables):
"""Creates pandas dataframe from the dictionary """Creates pandas dataframe from the dictionary
:arg ccl like dictionary :arg ccl like dictionary
:return pandas dataframe""" :return pandas dataframe"""
# create dictionary to which we pull only wanted items before transforming it to pd.dataframe # create dictionary to which we pull only wanted items before transforming it to pd.dataframe
pull_dict = {} pull_dict = {}
pull_dict["filenames"] = list() pull_dict["filenames"] = list()
for key in dict1["scan"][1]["params"]: for keys in variables:
pull_dict[key] = list() for item in variables[keys]:
pull_dict["temperature"] = list() pull_dict[item] = list()
pull_dict["mag_field"] = list()
pull_dict["fit_area"] = list() pull_dict["fit_area"] = list()
pull_dict["int_area"] = list() pull_dict["int_area"] = list()
pull_dict["om"] = list()
pull_dict["Counts"] = list() pull_dict["Counts"] = list()
for keys in pull_dict:
print(keys)
# populate the dict # populate the dict
for keys in dict1["scan"]: for keys in dict1["scan"]:
if "file_of_origin" in dict1["scan"][keys]: if "file_of_origin" in dict1["scan"][keys]:
pull_dict["filenames"].append(dict1["scan"][keys]["file_of_origin"].split("/")[-1]) pull_dict["filenames"].append(dict1["scan"][keys]["file_of_origin"].split("/")[-1])
else: else:
pull_dict["filenames"].append(dict1["meta"]["original_filename"].split("/")[-1]) pull_dict["filenames"].append(dict1["meta"]["original_filename"].split("/")[-1])
for key in dict1["scan"][keys]["params"]:
pull_dict[str(key)].append(float(dict1["scan"][keys]["params"][key]))
pull_dict["temperature"].append(dict1["scan"][keys]["temperature"])
pull_dict["mag_field"].append(dict1["scan"][keys]["mag_field"])
pull_dict["fit_area"].append(dict1["scan"][keys]["fit"]["fit_area"]) pull_dict["fit_area"].append(dict1["scan"][keys]["fit"]["fit_area"])
pull_dict["int_area"].append(dict1["scan"][keys]["fit"]["int_area"]) pull_dict["int_area"].append(dict1["scan"][keys]["fit"]["int_area"])
pull_dict["om"].append(dict1["scan"][keys]["om"])
pull_dict["Counts"].append(dict1["scan"][keys]["Counts"]) pull_dict["Counts"].append(dict1["scan"][keys]["Counts"])
for key in variables:
for i in variables[key]:
pull_dict[i].append(_finditem(dict1["scan"][keys], i))
return pd.DataFrame(data=pull_dict) return pd.DataFrame(data=pull_dict)
@ -213,7 +213,8 @@ def save_table(data, filetype, name, path=None):
if filetype == "json": if filetype == "json":
data.to_json((path + name + ".json")) data.to_json((path + name + ".json"))
def normalize(dict, key, monitor):
def normalize(scan, monitor):
"""Normalizes the measurement to monitor, checks if sigma exists, otherwise creates it """Normalizes the measurement to monitor, checks if sigma exists, otherwise creates it
:arg dict : dictionary to from which to tkae the scan :arg dict : dictionary to from which to tkae the scan
:arg key : which scan to normalize from dict1 :arg key : which scan to normalize from dict1
@ -221,15 +222,16 @@ def normalize(dict, key, monitor):
:return counts - normalized counts :return counts - normalized counts
:return sigma - normalized sigma""" :return sigma - normalized sigma"""
counts = np.array(dict["scan"][key]["Counts"]) counts = np.array(scan["Counts"])
sigma = np.sqrt(counts) if "sigma" not in dict["scan"][key] else dict["scan"][key]["sigma"] sigma = np.sqrt(counts) if "sigma" not in scan else scan["sigma"]
monitor_ratio = monitor / dict["scan"][key]["monitor"] monitor_ratio = monitor / scan["monitor"]
scaled_counts = counts * monitor_ratio scaled_counts = counts * monitor_ratio
scaled_sigma = np.array(sigma) * monitor_ratio scaled_sigma = np.array(sigma) * monitor_ratio
return scaled_counts, scaled_sigma return scaled_counts, scaled_sigma
def merge(dict1, dict2, scand_dict_result, keep=True, monitor=100000):
def merge(scan1, scan2, keep=True, monitor=100000):
"""merges the two tuples and sorts them, if om value is same, Counts value is average """merges the two tuples and sorts them, if om value is same, Counts value is average
averaging is propagated into sigma if dict1 == dict2, key[1] is deleted after merging averaging is propagated into sigma if dict1 == dict2, key[1] is deleted after merging
:arg dict1 : dictionary to which measurement will be merged :arg dict1 : dictionary to which measurement will be merged
@ -240,18 +242,15 @@ def merge(dict1, dict2, scand_dict_result, keep=True, monitor=100000):
:arg monitor : final monitor after merging :arg monitor : final monitor after merging
note: dict1 and dict2 can be same dict note: dict1 and dict2 can be same dict
:return dict1 with merged scan""" :return dict1 with merged scan"""
for keys in scand_dict_result:
for j in range(len(scand_dict_result[keys])):
first, second = scand_dict_result[keys][j][0], scand_dict_result[keys][j][1]
print(first, second)
if keep: if keep:
if dict1["scan"][first]["monitor"] == dict2["scan"][second]["monitor"]: if scan1["monitor"] == scan2["monitor"]:
monitor = dict1["scan"][first]["monitor"] monitor = scan1["monitor"]
# load om and Counts # load om and Counts
x1, x2 = dict1["scan"][first]["om"], dict2["scan"][second]["om"] x1, x2 = scan1["om"], scan2["om"]
cor_y1, y_err1 = normalize(dict1, first, monitor=monitor) cor_y1, y_err1 = normalize(scan1, monitor=monitor)
cor_y2, y_err2 = normalize(dict2, second, monitor=monitor) cor_y2, y_err2 = normalize(scan2, monitor=monitor)
# creates touples (om, Counts, sigma) for sorting and further processing # creates touples (om, Counts, sigma) for sorting and further processing
tuple_list = create_tuples(x1, cor_y1, y_err1) + create_tuples(x2, cor_y2, y_err2) tuple_list = create_tuples(x1, cor_y1, y_err1) + create_tuples(x2, cor_y2, y_err2)
# Sort the list on om and add 0 0 0 tuple to the last position # Sort the list on om and add 0 0 0 tuple to the last position
@ -277,25 +276,11 @@ def merge(dict1, dict2, scand_dict_result, keep=True, monitor=100000):
seen.append(sorted_t[i][0]) seen.append(sorted_t[i][0])
else: else:
continue continue
scan1["om"] = om
if dict1 == dict2: scan1["Counts"] = Counts
del dict1["scan"][second] scan1["sigma"] = sigma
scan1["monitor"] = monitor
note = (
f"This measurement was merged with measurement {second} from "
f'file {dict2["meta"]["original_filename"]} \n'
)
if "notes" not in dict1["scan"][first]:
dict1["scan"][first]["notes"] = note
else:
dict1["scan"][first]["notes"] += note
dict1["scan"][first]["om"] = om
dict1["scan"][first]["Counts"] = Counts
dict1["scan"][first]["sigma"] = sigma
dict1["scan"][first]["monitor"] = monitor
print("merging done") print("merging done")
return dict1
def add_dict(dict1, dict2): def add_dict(dict1, dict2):
@ -306,9 +291,13 @@ def add_dict(dict1, dict2):
:return dict1 : combined dictionary :return dict1 : combined dictionary
Note: dict1 must be made from ccl, otherwise we would have to change the structure of loaded Note: dict1 must be made from ccl, otherwise we would have to change the structure of loaded
dat file""" dat file"""
try:
if dict1["meta"]["zebra_mode"] != dict2["meta"]["zebra_mode"]: if dict1["meta"]["zebra_mode"] != dict2["meta"]["zebra_mode"]:
print("You are trying to add scans measured with different zebra modes") print("You are trying to add scans measured with different zebra modes")
return return
# this is for the qscan case
except KeyError:
print("Zebra mode not specified")
max_measurement_dict1 = max([keys for keys in dict1["scan"]]) max_measurement_dict1 = max([keys for keys in dict1["scan"]])
new_filenames = np.arange( new_filenames = np.arange(
max_measurement_dict1 + 1, max_measurement_dict1 + 1 + len(dict2["scan"]) max_measurement_dict1 + 1, max_measurement_dict1 + 1 + len(dict2["scan"])
@ -371,6 +360,9 @@ def scan_dict(dict, precision=0.5):
itup.append(abs(abs(dict["scan"][i][k]) - abs(dict["scan"][j][k]))) itup.append(abs(abs(dict["scan"][i][k]) - abs(dict["scan"][j][k])))
if all(i <= precision for i in itup): if all(i <= precision for i in itup):
print(itup)
print([dict["scan"][i][k] for k in angles])
print([dict["scan"][j][k] for k in angles])
if str([np.around(dict["scan"][i][k], 0) for k in angles]) not in d: if str([np.around(dict["scan"][i][k], 0) for k in angles]) not in d:
d[str([np.around(dict["scan"][i][k], 0) for k in angles])] = list() d[str([np.around(dict["scan"][i][k], 0) for k in angles])] = list()
d[str([np.around(dict["scan"][i][k], 0) for k in angles])].append((i, j)) d[str([np.around(dict["scan"][i][k], 0) for k in angles])].append((i, j))
@ -382,4 +374,115 @@ def scan_dict(dict, precision=0.5):
else: else:
continue continue
return d return d
def _finditem(obj, key):
if key in obj:
return obj[key]
for k, v in obj.items():
if isinstance(v, dict):
item = _finditem(v, key)
if item is not None:
return item
def most_common(lst):
return max(set(lst), key=lst.count)
def variables(dictionary):
"""Funcrion to guess what variables will be used in the param study
i call pripary variable the one the array like variable, usually omega
and secondary the slicing variable, different for each scan,for example temperature"""
# find all variables that are in all scans
stdev_precision = 0.05
all_vars = list()
for keys in dictionary["scan"]:
all_vars.append([key for key in dictionary["scan"][keys] if key != "params"])
if dictionary["scan"][keys]["params"]:
all_vars.append(key for key in dictionary["scan"][keys]["params"])
all_vars = [i for sublist in all_vars for i in sublist]
# get the ones that are in all scans
b = collections.Counter(all_vars)
inall = [key for key in b if b[key] == len(dictionary["scan"])]
# delete those that are obviously wrong
wrong = [
"NP",
"Counts",
"Monitor1",
"Monitor2",
"Monitor3",
"h_index",
"l_index",
"k_index",
"number_of_measurements",
"monitor",
"Time",
"omega_angle",
"twotheta_angle",
"chi_angle",
"phi_angle",
"nu_angle",
]
inall_red = [i for i in inall if i not in wrong]
# check for primary variable, needs to be list, we dont suspect the
# primary variable be as a parameter (be in scan[params])
primary_candidates = list()
for key in dictionary["scan"]:
for i in inall_red:
if isinstance(_finditem(dictionary["scan"][key], i), list):
if np.std(_finditem(dictionary["scan"][key], i)) > stdev_precision:
primary_candidates.append(i)
# check which of the primary are in every scan
primary_candidates = collections.Counter(primary_candidates)
second_round_primary_candidates = [
key for key in primary_candidates if primary_candidates[key] == len(dictionary["scan"])
]
if len(second_round_primary_candidates) == 1:
print("We've got a primary winner!", second_round_primary_candidates)
else:
print("Still not sure with primary:(", second_round_primary_candidates)
# check for secondary variable, we suspect a float\int or not changing array
# we dont need to check for primary ones
secondary_candidates = [i for i in inall_red if i not in second_round_primary_candidates]
# print("secondary candidates", secondary_candidates)
# select arrays and floats and ints
second_round_secondary_candidates = list()
for key in dictionary["scan"]:
for i in secondary_candidates:
if isinstance(_finditem(dictionary["scan"][key], i), float):
second_round_secondary_candidates.append(i)
elif isinstance(_finditem(dictionary["scan"][key], i), int):
second_round_secondary_candidates.append(i)
elif isinstance(_finditem(dictionary["scan"][key], i), list):
if np.std(_finditem(dictionary["scan"][key], i)) < stdev_precision:
second_round_secondary_candidates.append(i)
second_round_secondary_candidates = collections.Counter(second_round_secondary_candidates)
second_round_secondary_candidates = [
key
for key in second_round_secondary_candidates
if second_round_secondary_candidates[key] == len(dictionary["scan"])
]
# print("secondary candidates after second round", second_round_secondary_candidates)
# now we check if they vary between the scans
third_round_sec_candidates = list()
for i in second_round_secondary_candidates:
check_array = list()
for keys in dictionary["scan"]:
check_array.append(np.average(_finditem(dictionary["scan"][keys], i)))
# print(i, check_array, np.std(check_array))
if np.std(check_array) > stdev_precision:
third_round_sec_candidates.append(i)
if len(third_round_sec_candidates) == 1:
print("We've got a secondary winner!", third_round_sec_candidates)
else:
print("Still not sure with secondary :(", third_round_sec_candidates)
return {"primary": second_round_primary_candidates, "secondary": third_round_sec_candidates}