517 KiB
517 KiB
Segment water in GDL to quantify influence of liquid water on electrochemistry¶
TODO:
- select only the data containing the GDL and channels
- set up ML and train on one sample
- apply classfier to test sample and segment all
- align samples with new cropping --> can be independent of membrane segmentation
- allow (de-)selection of features during training
- allow to add features after most others have been already calculated
In [1]:
# modules import os import xarray as xr import matplotlib.pyplot as plt import numpy as np import dask import dask.array from scipy import ndimage from skimage import filters, feature, io from skimage.morphology import disk,ball import sys from itertools import combinations_with_replacement import pickle import imageio import json from dask.distributed import Client, LocalCluster import socket import subprocess import gc import h5py from dask import config as cfg cfg.set({'distributed.scheduler.worker-ttl': None, # Workaround so that dask does not kill workers while they are busy fetching data: https://dask.discourse.group/t/dask-workers-killed-because-of-heartbeat-fail/856, maybe this helps: https://www.youtube.com/watch?v=vF2VItVU5zg? 'distributed.scheduler.transition-log-length': 100, #potential workaround for ballooning scheduler memory https://baumgartner.io/posts/how-to-reduce-memory-usage-of-dask-scheduler/ 'distributed.scheduler.events-log-length': 100 }) # get the ML functions, TODO: make a library once it works/is in a stable state pytrainpath = '/mpc/homes/fische_r/lib/pytrainseg' #path to repo cwd = os.getcwd() os.chdir(pytrainpath) from filter_functions import image_filter import training_functions as tfs from training_functions import train_segmentation from segmentation import segmentation pytrain_git_sha = subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD']).decode().strip() os.chdir(cwd) #paths host = socket.gethostname() if host == 'mpc2959.psi.ch': gitpath = '/mpc/homes/fische_r/lib/co2ely-tomcat' #path where the notebook sits to put githash into hdf5 as metadata toppath = '/mpc/homes/fische_r/NAS/DASCOELY' toppathSSD = '/mnt/SSD/fische_r/COELY' temppath = '/mnt/SSD/fische_r/tmp' temppath_2 = '/mpc/homes/fische_r/NAS/tmp' training_path = '/mpc/homes/fische_r/NAS/DASCOELY/processing/05_water_GDL_ML/' # memlim = '840GB' memlim = '450GB' # memlim = '920GB' elif host == 'mpc2053.psi.ch': gitpath = '/mpc/homes/fische_r/lib/co2ely-tomcat' toppath = '/mpc/homes/fische_r/NAS/DASCOELY' toppathSSD = os.path.join(toppath, 'processing') temppath = '/mnt/SSD_2TB_nvme0n1/Robert/tmp/' temppath_2 = '/mpc/homes/fische_r/NAS/tmp' training_path = '/mpc/homes/fische_r/NAS/DASCOELY/processing/05_water_GDL_ML/' memlim = '360GB' else: print('host '+host+' currently not supported') path_02_3p1D = os.path.join(toppathSSD, '02_registered_3p1D') #h5 with registered data path_02_4D = os.path.join(toppathSSD, '02_registered_4D') #h5 with registered data as 4D array path_04_4D = os.path.join(toppathSSD, '04_lowpass_filtered_4D') # fetch githash cwd = os.getcwd() os.chdir(gitpath) git_sha = subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD']).decode().strip() githash = subprocess.check_output(['git', 'rev-parse', 'HEAD']).decode().strip() os.chdir(cwd)
functionalities for interactive training¶
In [2]:
from ipywidgets import Image from ipywidgets import ColorPicker, IntSlider, link, AppLayout, HBox from ipycanvas import hold_canvas, MultiCanvas #RoughCanvas,Canvas, def on_mouse_down(x, y): global drawing global position global shape drawing = True position = (x, y) shape = [position] def on_mouse_move(x, y): global drawing global position global shape if not drawing: return with hold_canvas(): canvas.stroke_line(position[0], position[1], x, y) position = (x, y) shape.append(position) def on_mouse_up(x, y): global drawing global positiondu global shape drawing = False with hold_canvas(): canvas.stroke_line(position[0], position[1], x, y) canvas.fill_polygon(shape) shape = [] def display_feature(i, TS, feat_stack): # print('selected '+TS.feature_names[i]) im = feat_stack[:,:,i] im8 = im-im.min() im8 = im8/im8.max()*255 return im8
fire up dask, distributed Client currently not usable. No idea how not setting up dask affects the computation¶
In [3]:
dask.config.config['temporary-directory'] = temppath def boot_client(dashboard_address=':35000', memory_limit = memlim, n_workers=2): tempfolder = temppath #a big SSD is a major adavantage to allow spill to disk and still be efficient. large dataset might crash with too small SSD or be slow with normal HDD # tempfolder = temppath_2 # dask.config.config['distributed']['worker']['memory']['recent-to-old-time'] = '200000s' # here you have the option to use a virtual cluster or even slurm on ra (not attempted yet) cluster = LocalCluster(dashboard_address=dashboard_address, memory_limit = memory_limit, n_workers=n_workers) #settings optimised for mpc2959, play around if needed, if you know nothing else is using RAM then you can almost go to the limit # # maybe less workers with more threads makes better use of shared memory # # scheduler_port = 'tcp://129.129.188.222:8786' #<-- if scheduler on mpc2959; scheduler on mpc2053 -> 'tcp://129.129.188.248:8786' # # cluster = scheduler_port client = Client(cluster) # # client.amm.start() print('Dashboard at '+client.dashboard_link) return client, cluster
In [4]:
client, cluster = boot_client()
Dashboard at http://127.0.0.1:35000/status
In [5]:
def reboot_client(client, dashboard_address=':35000', memory_limit = memlim, n_workers=2): client.shutdown() cluster = LocalCluster(dashboard_address=dashboard_address, memory_limit = memory_limit, n_workers=n_workers) client = Client(cluster) return client
In [6]:
# client.restart_workers(workers)
Data preparation¶
let dask load the data¶
In [22]:
filename = '04_'+sample+'_lowpass_filteres_4D.nc' imagepath = os.path.join(path_04_4D, filename)
In [23]:
file = h5py.File(imagepath)
In [24]:
da = dask.array.from_array(file['image_data'][a:b, c+GDL_crop:d, e:f], chunks= chunks)
In [25]:
da
Out[25]:
|
||||||||||||||||
get data into image filter class¶
In [26]:
# TODO: include this routine into pytrainseg IF = image_filter(sigmas = [0,1,3,6]) IF.data = da # IF.future = client.scatter(da) # shp = da.shape coords = {'x': np.arange(shp[0]), 'y': np.arange(shp[1]), 'z': np.arange(shp[2]), 'time': np.arange(shp[3])} IF.original_dataset = xr.Dataset({'tomo': (['x','y','z','time'], da)}, coords = coords )
prepare features¶
In [27]:
IF.prepare()
In [28]:
IF.stack_features()
In [30]:
IF.feature_stack
Out[30]:
|
||||||||||||||||
In [31]:
IF.make_xarray_nc()
Training¶
set up objects¶
In [32]:
# quick fix: copy full feature set and cast selection into TS.training_dict # TODO: select for training without copying def features_to_keep(feat_length, features_to_remove): ids = np.ones(feat_length, dtype=bool) for feat in features_to_remove: ids[feat] = False return ids def ignore_feat_per_slice(entry, ids): truth = entry[1] feats = entry[0] feats = feats[:,ids] return (feats, truth) def ignore_features(TS, features_to_remove = []): if TS.training_dict_full is None: TS.combined_feature_names_full = TS.combined_feature_names.copy() TS.training_dict_full = TS.training_dict.copy() ids = features_to_keep(len(TS.combined_feature_names_full), features_to_remove) temp_dict = {} for key in TS.training_dict_full: entry = TS.training_dict_full[key] temp_dict[key] = ignore_feat_per_slice(entry, ids) TS.training_dict = temp_dict TS.combined_feature_names = np.array(TS.combined_feature_names_full)[ids] return TS, ids
In [33]:
training_path_sample = os.path.join(training_path, sample) if not os.path.exists(training_path_sample): os.mkdir(training_path_sample)
In [34]:
TS = train_segmentation(training_path=training_path_sample) TS.client = client IF.client = client TS.cluster = cluster IF.cluster = cluster TS.memlim = memlim TS.n_workers = 2
In [35]:
# TS.training_dict_full = {}
In [36]:
TS.import_lazy_feature_data(IF.result, IF.original_dataset)
In [37]:
IF.combined_feature_names = list(IF.feature_names) + list(IF.feature_names_time_independent)
In [38]:
TS.combined_feature_names = IF.combined_feature_names
In [39]:
TS.combined_feature_names
Out[39]:
['Gaussian_4D_Blur_0.0', 'Gaussian_4D_Blur_1.0', 'Gaussian_4D_Blur_3.0', 'Gaussian_4D_Blur_6.0', 'Gaussian_4D_Blur_2.0', 'diff_of_gauss_4D_1.0_0.0', 'diff_of_gauss_4D_3.0_0.0', 'diff_of_gauss_4D_6.0_0.0', 'diff_of_gauss_4D_2.0_0.0', 'diff_of_gauss_4D_3.0_1.0', 'diff_of_gauss_4D_6.0_1.0', 'diff_of_gauss_4D_2.0_1.0', 'diff_of_gauss_4D_6.0_3.0', 'diff_of_gauss_4D_2.0_3.0', 'diff_of_gauss_4D_2.0_6.0', 'Gradient_sigma_1.0_0', 'Gradient_sigma_1.0_1', 'Gradient_sigma_1.0_2', 'Gradient_sigma_1.0_3', 'hessian_sigma_1.0_00', 'hessian_sigma_1.0_01', 'hessian_sigma_1.0_02', 'hessian_sigma_1.0_03', 'hessian_sigma_1.0_11', 'hessian_sigma_1.0_12', 'hessian_sigma_1.0_13', 'hessian_sigma_1.0_22', 'hessian_sigma_1.0_23', 'hessian_sigma_1.0_33', 'Gradient_sigma_3.0_0', 'Gradient_sigma_3.0_1', 'Gradient_sigma_3.0_2', 'Gradient_sigma_3.0_3', 'hessian_sigma_3.0_00', 'hessian_sigma_3.0_01', 'hessian_sigma_3.0_02', 'hessian_sigma_3.0_03', 'hessian_sigma_3.0_11', 'hessian_sigma_3.0_12', 'hessian_sigma_3.0_13', 'hessian_sigma_3.0_22', 'hessian_sigma_3.0_23', 'hessian_sigma_3.0_33', 'Gradient_sigma_6.0_0', 'Gradient_sigma_6.0_1', 'Gradient_sigma_6.0_2', 'Gradient_sigma_6.0_3', 'hessian_sigma_6.0_00', 'hessian_sigma_6.0_01', 'hessian_sigma_6.0_02', 'hessian_sigma_6.0_03', 'hessian_sigma_6.0_11', 'hessian_sigma_6.0_12', 'hessian_sigma_6.0_13', 'hessian_sigma_6.0_22', 'hessian_sigma_6.0_23', 'hessian_sigma_6.0_33', 'Gradient_sigma_2.0_0', 'Gradient_sigma_2.0_1', 'Gradient_sigma_2.0_2', 'Gradient_sigma_2.0_3', 'hessian_sigma_2.0_00', 'hessian_sigma_2.0_01', 'hessian_sigma_2.0_02', 'hessian_sigma_2.0_03', 'hessian_sigma_2.0_11', 'hessian_sigma_2.0_12', 'hessian_sigma_2.0_13', 'hessian_sigma_2.0_22', 'hessian_sigma_2.0_23', 'hessian_sigma_2.0_33', 'Gaussian_time_0.0', 'Gaussian_time_1.0', 'Gaussian_time_3.0', 'Gaussian_time_6.0', 'Gaussian_time_2.0', 'diff_of_gauss_time_1.0_0.0', 'diff_of_gauss_time_3.0_0.0', 'diff_of_gauss_time_6.0_0.0', 'diff_of_gauss_time_2.0_0.0', 'diff_of_gauss_time_3.0_1.0', 'diff_of_gauss_time_6.0_1.0', 'diff_of_gauss_time_2.0_1.0', 'diff_of_gauss_time_6.0_3.0', 'diff_of_gauss_time_2.0_3.0', 'diff_of_gauss_time_2.0_6.0', 'Gaussian_space_0.0', 'Gaussian_space_1.0', 'Gaussian_space_3.0', 'Gaussian_space_6.0', 'Gaussian_space_2.0', 'diff_of_gauss_space_1.0_0.0', 'diff_of_gauss_space_3.0_0.0', 'diff_of_gauss_space_6.0_0.0', 'diff_of_gauss_space_2.0_0.0', 'diff_of_gauss_space_3.0_1.0', 'diff_of_gauss_space_6.0_1.0', 'diff_of_gauss_space_2.0_1.0', 'diff_of_gauss_space_6.0_3.0', 'diff_of_gauss_space_2.0_3.0', 'diff_of_gauss_space_2.0_6.0', 'diff_to_min_', 'diff_temp_min_Gauss_2.0', 'full_temp_mean_', 'full_temp_min_', 'full_temp_min_Gauss_2.0']
interactive training¶
check for existing training sets¶
In [40]:
existing_sets = os.listdir(os.path.join(training_path_sample, 'label_images')) existing_sets.sort() existing_sets
Out[40]:
[]
In [41]:
training_path
Out[41]:
'/mpc/homes/fische_r/NAS/DASCOELY/processing/05_water_GDL_ML/'
In [42]:
# you can load a compatible pickled training dict, check feature names # TS.training_dict = pickle.load(open(os.path.join(TS.training_path, pytrain_git_sha+'_training_dict.p'),'rb'))
In [43]:
TS.training_dict = {}
re-train with existing label sets. clear the training dictionary if necessary (training_dict)¶
In [44]:
# TS.train()
import training dict of other samples¶
(replace sample name and repeat for multiple samples), if necessary check features for overlap
In [45]:
oldsample = '4' oldgitsha = 'e42ad75' #'109a7ce3' #retrain at one point # if oldsample == '4': # training_dict_old = pickle.load(open(os.path.join(toppathSSD, '05_water_GDL_ML', '4', 'ec4415d_training_dict_without_loc_feat.p'), 'rb')) # else: training_dict_old = pickle.load(open(os.path.join(training_path, oldsample, oldgitsha+'_training_dict.p'),'rb')) oldfeatures = pickle.load(open(os.path.join(training_path, oldsample, oldgitsha+'_feature_names.p'),'rb')) # pickle.dump(TS.training_dict, open(os.path.join(TS.training_path, pytrain_git_sha+'_training_dict.p'),'wb')) # pickle.dump(TS.feature_names, open(os.path.join(TS.training_path, pytrain_git_sha+'_feature_names.p'),'wb')) for key in training_dict_old.keys(): TS.training_dict[oldsample+key] = training_dict_old[key]
In [46]:
len(TS.training_dict.keys())
Out[46]:
29
suggest a new training coordinate¶
currently retraining with new feature stack not properly implemented. Workaround: choose from the exiting training sets and train with them (additional labeling optional)
In [49]:
TS.suggest_training_set() #this function does not work anymoere as intended, run it a few times until it gives a reasonable suggestion
You could try x = 680 and feature = 8 However, please sort it like the original xyztimetime_0feature
In [50]:
c1 = 'x' p1 = 530 c2 = 'time' p2 = 16
In [111]:
TS.load_training_set(c1, p1, c2, p2)
2023-10-16 16:35:37,076 - distributed.nanny - WARNING - Restarting worker 2023-10-16 16:35:37,084 - distributed.nanny - WARNING - Restarting worker
In [112]:
# client = reboot_client(client)
In [113]:
# TS.client = client # IF.client = client
In [114]:
# cluster = LocalCluster(dashboard_address=':35000', memory_limit = memlim, n_workers=2) #settings optimised for mpc2959, play around if needed, if you know nothing else is using RAM then you can almost go to the limit
In [115]:
if not len(client.cluster.workers)>1: client = reboot_client(client) TS.client = client IF.client = client
In [116]:
im8 = TS.current_im8
In [117]:
im8.shape
Out[117]:
(340, 1916)
In [118]:
# TS.get_slice_feat_stack()
In [119]:
feat_data = TS.feat_data [c1,p1,c2,p2] = TS.current_coordinates newslice = True if c1 == 'x' and c2 == 'time': feat_stack = feat_data['feature_stack'].sel(x = p1, time = p2) feat_stack_t_idp = feat_data['feature_stack_time_independent'].sel(x = p1, time_0 = 0) elif c1 == 'x' and c2 == 'y': feat_stack = feat_data['feature_stack'].sel(x = p1, y = p2)#.data feat_stack_t_idp = feat_data['feature_stack_time_independent'].sel(x = p1, y = p2) elif c1 == 'x' and c2 == 'z': feat_stack = feat_data['feature_stack'].sel(x = p1, z = p2)#.data feat_stack_t_idp = feat_data['feature_stack_time_independent'].sel(x = p1, z = p2) elif c1 == 'y' and c2 == 'z': feat_stack = feat_data['feature_stack'].sel(y = p1, z = p2)#.data feat_stack_t_idp = feat_data['feature_stack_time_independent'].sel(y = p1, z = p2) elif c1 == 'y' and c2 == 'time': feat_stack = feat_data['feature_stack'].sel(y = p1, time = p2)#.data feat_stack_t_idp = feat_data['feature_stack_time_independent'].sel(y = p1, time_0 = 0) elif c1 == 'z' and c2 == 'time': feat_stack = feat_data['feature_stack'].sel(z = p1, time = p2)#.data feat_stack_t_idp = feat_data['feature_stack_time_independent'].sel(z = p1, time_0 = 0)
In [120]:
if type(feat_stack) is not np.ndarray: fut = client.scatter(feat_stack) fut = fut.result() fut = fut.compute() feat_stack = fut try: client.restart() except: client = reboot_client(client) TS.client = client IF.client = client
/mpc/homes/fische_r/miniconda3/lib/python3.11/site-packages/distributed/client.py:3149: UserWarning: Sending large graph of size 70.32 MiB. This may cause some slowdown. Consider scattering data ahead of time and using futures. warnings.warn( 2023-10-16 16:48:40,746 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%) 2023-10-16 16:48:43,874 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%) 2023-10-16 16:48:45,807 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%) 2023-10-16 16:48:48,566 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%) 2023-10-16 16:48:52,023 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%) 2023-10-16 16:48:54,870 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%) 2023-10-16 16:48:57,613 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%) 2023-10-16 16:49:00,164 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%) 2023-10-16 16:49:02,913 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%) 2023-10-16 16:49:05,388 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%) 2023-10-16 16:49:10,057 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%) 2023-10-16 16:49:14,945 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%) 2023-10-16 16:49:20,364 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%) 2023-10-16 16:49:25,251 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%) 2023-10-16 16:49:36,043 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%) 2023-10-16 16:49:47,614 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%) 2023-10-16 16:49:53,204 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%) 2023-10-16 16:49:59,902 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%) 2023-10-16 16:50:04,744 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%) 2023-10-16 16:50:11,602 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%) 2023-10-16 16:50:20,690 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%) 2023-10-16 16:50:26,324 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%) 2023-10-16 16:50:37,569 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%) 2023-10-16 16:50:48,366 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%) 2023-10-16 16:51:27,134 - distributed.worker.memory - WARNING - gc.collect() took 4.416s. This is usually a sign that some tasks handle too many Python objects at the same time. Rechunking the work into smaller tasks might help. 2023-10-16 16:51:58,619 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%) 2023-10-16 16:52:21,992 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%) 2023-10-16 16:52:48,301 - distributed.utils_perf - WARNING - full garbage collections took 15% CPU time recently (threshold: 10%) 2023-10-16 16:53:27,298 - distributed.worker.memory - WARNING - gc.collect() took 3.806s. This is usually a sign that some tasks handle too many Python objects at the same time. Rechunking the work into smaller tasks might help. 2023-10-16 16:53:40,754 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%) 2023-10-16 16:54:02,620 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%) 2023-10-16 16:54:52,449 - distributed.utils_perf - WARNING - full garbage collections took 13% CPU time recently (threshold: 10%) 2023-10-16 16:56:02,341 - distributed.utils_perf - WARNING - full garbage collections took 12% CPU time recently (threshold: 10%) 2023-10-16 16:56:53,988 - distributed.utils_perf - WARNING - full garbage collections took 11% CPU time recently (threshold: 10%) 2023-10-16 16:57:42,095 - distributed.utils_perf - WARNING - full garbage collections took 11% CPU time recently (threshold: 10%) 2023-10-16 16:58:11,898 - distributed.utils_perf - WARNING - full garbage collections took 11% CPU time recently (threshold: 10%) 2023-10-16 16:58:50,714 - distributed.utils_perf - WARNING - full garbage collections took 11% CPU time recently (threshold: 10%) 2023-10-16 16:59:13,292 - distributed.utils_perf - WARNING - full garbage collections took 10% CPU time recently (threshold: 10%) 2023-10-16 16:59:31,361 - distributed.utils_perf - WARNING - full garbage collections took 10% CPU time recently (threshold: 10%) 2023-10-16 16:59:54,830 - distributed.utils_perf - WARNING - full garbage collections took 11% CPU time recently (threshold: 10%) 2023-10-16 17:00:18,192 - distributed.utils_perf - WARNING - full garbage collections took 11% CPU time recently (threshold: 10%) 2023-10-16 17:00:30,861 - distributed.utils_perf - WARNING - full garbage collections took 11% CPU time recently (threshold: 10%) 2023-10-16 17:00:49,957 - distributed.nanny - WARNING - Restarting worker 2023-10-16 17:00:57,938 - distributed.nanny - WARNING - Restarting worker
reboot cluster if workers do not return¶
In [134]:
client.cluster.workers
Out[134]:
{0: <Nanny: tcp://127.0.0.1:44615, threads: 64>,
1: <Nanny: tcp://127.0.0.1:44151, threads: 64>}
In [ ]:
if type(feat_stack_t_idp) is not np.ndarray: fut = client.scatter(feat_stack_t_idp) fut = fut.result() fut = fut.compute() feat_stack_t_idp = fut try: client.restart() except: client = reboot_client(client) TS.client = client IF.client = client
In [ ]:
client.cluster.workers
In [ ]:
feat_stack = np.concatenate([feat_stack, feat_stack_t_idp], axis = 2)
In [135]:
feat_stack.shape
Out[135]:
(340, 1916, 106)
In [ ]:
TS.current_feat_stack = feat_stack if type(TS.current_feat_stack) is not np.ndarray: TS.current_computed = False else: TS.current_computed = True
canvas for labeling¶
In [ ]:
# imm = mean[:,p1,:].copy() # im8m= imm-imm.min() # im8m = im8m/im8m.max()*255 # im8 = im8m
In [ ]:
for i in range(len(IF.combined_feature_names)): print(i, IF.combined_feature_names[i])
In [ ]:
i = 74 print(i, IF.combined_feature_names[i])
label the training image by setting the color and drawing on the canvas¶
- currently 4 options possible: #ff0000, #00ff00, #0000ff, #ffff00
- rerun cell to clear labeling
- tip: use the trackpad (panning?) zoom instead of ctrl+scrollwheel
In [178]:
alpha = 0.35 # zoom1 = (-500,-1) # zoom2 = (600,1400) # zoom1 = (0, -1) # zoom2 = (0, -1) # im8 = TS.current_im8 #trick: use gaussian_time_4_0 to label static phases () # im8 = display_feature(104, TS, feat_stack) # im8 = display_feature(0, TS) # print(IF.combined_feature_names[-20]) print('original shape: ',im8.shape) im8_display = im8.copy() #[zoom1[0]:zoom1[1], zoom2[0]:zoom2[1]] # print('diyplay shape : ',im8_display.shape,' at: ', (zoom1[0], zoom2[0])) resultim = TS.current_result.copy() resultim_display = resultim #[zoom1[0]:zoom1[1], zoom2[0]:zoom2[1]] width = im8_display.shape[1] height = im8_display.shape[0] Mcanvas = MultiCanvas(4, width=width, height=height) background = Mcanvas[0] resultdisplay = Mcanvas[2] truthdisplay = Mcanvas[1] canvas = Mcanvas[3] canvas.sync_image_data = True drawing = False position = None shape = [] image_data = np.stack((im8_display, im8_display, im8_display), axis=2) background.put_image_data(image_data, 0, 0) slidealpha = IntSlider(description="Result overlay", value=0.15) resultdisplay.global_alpha = alpha #slidealpha.value if np.any(resultim>0): result_data = np.stack(((resultim_display==0), (resultim_display==1),(resultim_display==2)), axis=2)*255 mask3 = resultim_display==3 result_data[mask3,0] = 255 result_data[mask3,1] = 255 else: result_data = np.stack((0*resultim, 0*resultim, 0*resultim), axis=2) resultdisplay.put_image_data(result_data, 0, 0) canvas.on_mouse_down(on_mouse_down) canvas.on_mouse_move(on_mouse_move) canvas.on_mouse_up(on_mouse_up) picker = ColorPicker(description="Color:", value="#ff0000") #red # picker = ColorPicker(description="Color:", value="#0000ff") #blue # picker = ColorPicker(description="Color:", value="#00ff00") #green link((picker, "value"), (canvas, "stroke_style")) link((picker, "value"), (canvas, "fill_style")) link((slidealpha, "value"), (resultdisplay, "global_alpha")) HBox((Mcanvas,picker)) # HBox((Mcanvas,)) #picker
original shape: (340, 1916)
Out[178]:
HBox(children=(MultiCanvas(height=340, width=1916), ColorPicker(value='#ff0000', description='Color:')))
In [170]:
tfs.plot_im_histogram(im8) # im8 = TS.current_im8 # im8 = tfs.adjust_image_contrast(im8,30,110)
inspect labels and training progress¶
In [171]:
fig, axes = plt.subplots(1,4, figsize=(20,10)) axes[0].imshow(TS.current_result, 'gray') axes[1].imshow(TS.current_im8, 'gray') # TS.current_diff_im = TS.current_im-TS.current_first_im # TS.current_diff_im = TS.current_diff_im/TS.current_diff_im.max()*255 # axes[2].imshow(-TS.current_diff_im)#,vmin=6e4) # axes[3].imshow(im8old, 'gray') # axes[3].imshow(TS.current_first_im, 'gray') axes[2].imshow(TS.current_truth) if TS.current_computed: axes[3].imshow(TS.current_feat_stack[:,:,-10]) else: axes[3].imshow(TS.current_result, 'gray') for ax in axes: ax.set_xticks([]) ax.set_yticks([])
update training set if labels are ok¶
In [172]:
label_set = canvas.get_image_data() test = TS.current_truth.copy() test[np.bitwise_and(label_set[:,:,0]>0,np.bitwise_xor(label_set[:,:,0]>0,label_set[:,:,1]>0))] = 1 test[label_set[:,:,1]>0] = 2 test[label_set[:,:,2]>0] = 4 #order of 4&3 flipped for legacy reasons (existing training labels) test[np.bitwise_and(label_set[:,:,0]>0,label_set[:,:,1]>0)] = 3 TS.current_truth = test.copy() imageio.imsave(TS.current_truthpath, TS.current_truth)
train!¶
In [175]:
TS.train_slice()
training and classifying
iterate labeling and training on current training slice until happy then repeat on different slice¶
check on training progress by plausible feature importance¶
In [180]:
plt.figure(figsize=(16,9)) plt.stem(IF.combined_feature_names, TS.clf.feature_importances_,'x') plt.xticks(rotation=90) plt.ylabel('importance') # plt.xticks(rotation = 60) # plt.yscale('log')
Out[180]:
Text(0, 0.5, 'importance')
when done, maybe save the classifier and optional the training dict (avoids recalculating the training sets, but might be large)¶
In [181]:
TS.pickle_classifier() pickle.dump(TS.training_dict, open(os.path.join(TS.training_path, pytrain_git_sha+'_training_dict.p'),'wb')) pickle.dump(TS.combined_feature_names, open(os.path.join(TS.training_path, pytrain_git_sha+'_feature_names.p'),'wb'))
In [182]:
TS.training_path
Out[182]:
'/mpc/homes/fische_r/NAS/DASCOELY/processing/05_water_GDL_ML/4'
In [183]:
pytrain_git_sha
Out[183]:
'e42ad75'
Segmentation of full data set¶
Sometimes the dask esxecution stops randomly. Cancel notebook, save intermediate results and restart the notebook to continue
In [40]:
from segmentation import segmentation
In [41]:
classifier_path=os.path.join(training_path, 'classifier.p') SM = segmentation(training_path = training_path, classifier_path=classifier_path)
In [42]:
SM.clf = pickle.load(open(os.path.join(training_path, 'classifier.p'), 'rb'))
In [43]:
clf = SM.clf clf.n_jobs = 64 if host == 'mpc2053.psi.ch': clf.n_jobs = 20
merge time-independent features¶
In [47]:
# dask.array.stack([TS.feat_data['feature_stack_time_independent'][:,:,:,0,:]]*da.shape[-1], axis=-2) feat_idp = TS.feat_data['feature_stack_time_independent'][:,:,:,0,:]
In [49]:
# feat = dask.array.concatenate([TS.feat_data['feature_stack'], test], axis=-1) feat = TS.feat_data['feature_stack']
In [51]:
feat
Out[51]:
<style>/* CSS stylesheet for displaying xarray objects in jupyterlab.
*
*/
:root {
--xr-font-color0: var(--jp-content-font-color0, rgba(0, 0, 0, 1));
--xr-font-color2: var(--jp-content-font-color2, rgba(0, 0, 0, 0.54));
--xr-font-color3: var(--jp-content-font-color3, rgba(0, 0, 0, 0.38));
--xr-border-color: var(--jp-border-color2, #e0e0e0);
--xr-disabled-color: var(--jp-layout-color3, #bdbdbd);
--xr-background-color: var(--jp-layout-color0, white);
--xr-background-color-row-even: var(--jp-layout-color1, white);
--xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);
}
html[theme=dark],
body[data-theme=dark],
body.vscode-dark {
--xr-font-color0: rgba(255, 255, 255, 1);
--xr-font-color2: rgba(255, 255, 255, 0.54);
--xr-font-color3: rgba(255, 255, 255, 0.38);
--xr-border-color: #1F1F1F;
--xr-disabled-color: #515151;
--xr-background-color: #111111;
--xr-background-color-row-even: #111111;
--xr-background-color-row-odd: #313131;
}
.xr-wrap {
display: block !important;
min-width: 300px;
max-width: 700px;
}
.xr-text-repr-fallback {
/* fallback to plain text repr when CSS is not injected (untrusted notebook) */
display: none;
}
.xr-header {
padding-top: 6px;
padding-bottom: 6px;
margin-bottom: 4px;
border-bottom: solid 1px var(--xr-border-color);
}
.xr-header > div,
.xr-header > ul {
display: inline;
margin-top: 0;
margin-bottom: 0;
}
.xr-obj-type,
.xr-array-name {
margin-left: 2px;
margin-right: 10px;
}
.xr-obj-type {
color: var(--xr-font-color2);
}
.xr-sections {
padding-left: 0 !important;
display: grid;
grid-template-columns: 150px auto auto 1fr 20px 20px;
}
.xr-section-item {
display: contents;
}
.xr-section-item input {
display: none;
}
.xr-section-item input + label {
color: var(--xr-disabled-color);
}
.xr-section-item input:enabled + label {
cursor: pointer;
color: var(--xr-font-color2);
}
.xr-section-item input:enabled + label:hover {
color: var(--xr-font-color0);
}
.xr-section-summary {
grid-column: 1;
color: var(--xr-font-color2);
font-weight: 500;
}
.xr-section-summary > span {
display: inline-block;
padding-left: 0.5em;
}
.xr-section-summary-in:disabled + label {
color: var(--xr-font-color2);
}
.xr-section-summary-in + label:before {
display: inline-block;
content: '►';
font-size: 11px;
width: 15px;
text-align: center;
}
.xr-section-summary-in:disabled + label:before {
color: var(--xr-disabled-color);
}
.xr-section-summary-in:checked + label:before {
content: '▼';
}
.xr-section-summary-in:checked + label > span {
display: none;
}
.xr-section-summary,
.xr-section-inline-details {
padding-top: 4px;
padding-bottom: 4px;
}
.xr-section-inline-details {
grid-column: 2 / -1;
}
.xr-section-details {
display: none;
grid-column: 1 / -1;
margin-bottom: 5px;
}
.xr-section-summary-in:checked ~ .xr-section-details {
display: contents;
}
.xr-array-wrap {
grid-column: 1 / -1;
display: grid;
grid-template-columns: 20px auto;
}
.xr-array-wrap > label {
grid-column: 1;
vertical-align: top;
}
.xr-preview {
color: var(--xr-font-color3);
}
.xr-array-preview,
.xr-array-data {
padding: 0 5px !important;
grid-column: 2;
}
.xr-array-data,
.xr-array-in:checked ~ .xr-array-preview {
display: none;
}
.xr-array-in:checked ~ .xr-array-data,
.xr-array-preview {
display: inline-block;
}
.xr-dim-list {
display: inline-block !important;
list-style: none;
padding: 0 !important;
margin: 0;
}
.xr-dim-list li {
display: inline-block;
padding: 0;
margin: 0;
}
.xr-dim-list:before {
content: '(';
}
.xr-dim-list:after {
content: ')';
}
.xr-dim-list li:not(:last-child):after {
content: ',';
padding-right: 5px;
}
.xr-has-index {
font-weight: bold;
}
.xr-var-list,
.xr-var-item {
display: contents;
}
.xr-var-item > div,
.xr-var-item label,
.xr-var-item > .xr-var-name span {
background-color: var(--xr-background-color-row-even);
margin-bottom: 0;
}
.xr-var-item > .xr-var-name:hover span {
padding-right: 5px;
}
.xr-var-list > li:nth-child(odd) > div,
.xr-var-list > li:nth-child(odd) > label,
.xr-var-list > li:nth-child(odd) > .xr-var-name span {
background-color: var(--xr-background-color-row-odd);
}
.xr-var-name {
grid-column: 1;
}
.xr-var-dims {
grid-column: 2;
}
.xr-var-dtype {
grid-column: 3;
text-align: right;
color: var(--xr-font-color2);
}
.xr-var-preview {
grid-column: 4;
}
.xr-index-preview {
grid-column: 2 / 5;
color: var(--xr-font-color2);
}
.xr-var-name,
.xr-var-dims,
.xr-var-dtype,
.xr-preview,
.xr-attrs dt {
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
padding-right: 10px;
}
.xr-var-name:hover,
.xr-var-dims:hover,
.xr-var-dtype:hover,
.xr-attrs dt:hover {
overflow: visible;
width: auto;
z-index: 1;
}
.xr-var-attrs,
.xr-var-data,
.xr-index-data {
display: none;
background-color: var(--xr-background-color) !important;
padding-bottom: 5px !important;
}
.xr-var-attrs-in:checked ~ .xr-var-attrs,
.xr-var-data-in:checked ~ .xr-var-data,
.xr-index-data-in:checked ~ .xr-index-data {
display: block;
}
.xr-var-data > table {
float: right;
}
.xr-var-name span,
.xr-var-data,
.xr-index-name div,
.xr-index-data,
.xr-attrs {
padding-left: 25px !important;
}
.xr-attrs,
.xr-var-attrs,
.xr-var-data,
.xr-index-data {
grid-column: 1 / -1;
}
dl.xr-attrs {
padding: 0;
margin: 0;
display: grid;
grid-template-columns: 125px auto;
}
.xr-attrs dt,
.xr-attrs dd {
padding: 0;
margin: 0;
float: left;
padding-right: 10px;
width: auto;
}
.xr-attrs dt {
font-weight: normal;
grid-column: 1;
}
.xr-attrs dt:hover span {
display: inline-block;
background: var(--xr-background-color);
padding-right: 10px;
}
.xr-attrs dd {
grid-column: 2;
white-space: pre-wrap;
word-break: break-all;
}
.xr-icon-database,
.xr-icon-file-text2,
.xr-no-icon {
display: inline-block;
vertical-align: middle;
width: 1em;
height: 1.5em !important;
stroke-width: 0;
stroke: currentColor;
fill: currentColor;
}
</style>
<xarray.DataArray 'feature_stack' (x: 750, y: 340, z: 1916, time: 63,
feature: 103)>
dask.array<stack, shape=(750, 340, 1916, 63, 103), dtype=float64, chunksize=(36, 36, 36, 63, 1), chunktype=numpy.ndarray>
Coordinates:
* x (x) int64 0 1 2 3 4 5 6 7 8 ... 741 742 743 744 745 746 747 748 749
* y (y) int64 0 1 2 3 4 5 6 7 8 ... 331 332 333 334 335 336 337 338 339
* z (z) int64 0 1 2 3 4 5 6 7 ... 1909 1910 1911 1912 1913 1914 1915
* time (time) int64 0 1 2 3 4 5 6 7 8 9 ... 53 54 55 56 57 58 59 60 61 62
* feature (feature) <U27 'Gaussian_4D_Blur_0.0' ... 'diff_temp_min_Gauss_2.0'xarray.DataArray
'feature_stack'
- x: 750
- y: 340
- z: 1916
- time: 63
- feature: 103
- dask.array<chunksize=(36, 36, 36, 63, 1), meta=np.ndarray>
Array Chunk Bytes 23.07 TiB 22.43 MiB Shape (750, 340, 1916, 63, 103) (36, 36, 36, 63, 1) Dask graph 1308615 chunks in 540 graph layers Data type float64 numpy.ndarray - x(x)int640 1 2 3 4 5 ... 745 746 747 748 749
array([ 0, 1, 2, ..., 747, 748, 749])
- y(y)int640 1 2 3 4 5 ... 335 336 337 338 339
array([ 0, 1, 2, ..., 337, 338, 339])
- z(z)int640 1 2 3 4 ... 1912 1913 1914 1915
array([ 0, 1, 2, ..., 1913, 1914, 1915])
- time(time)int640 1 2 3 4 5 6 ... 57 58 59 60 61 62
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62]) - feature(feature)<U27'Gaussian_4D_Blur_0.0' ... 'diff...
array(['Gaussian_4D_Blur_0.0', 'Gaussian_4D_Blur_1.0', 'Gaussian_4D_Blur_3.0', 'Gaussian_4D_Blur_6.0', 'Gaussian_4D_Blur_2.0', 'diff_of_gauss_4D_1.0_0.0', 'diff_of_gauss_4D_3.0_0.0', 'diff_of_gauss_4D_6.0_0.0', 'diff_of_gauss_4D_2.0_0.0', 'diff_of_gauss_4D_3.0_1.0', 'diff_of_gauss_4D_6.0_1.0', 'diff_of_gauss_4D_2.0_1.0', 'diff_of_gauss_4D_6.0_3.0', 'diff_of_gauss_4D_2.0_3.0', 'diff_of_gauss_4D_2.0_6.0', 'Gradient_sigma_1.0_0', 'Gradient_sigma_1.0_1', 'Gradient_sigma_1.0_2', 'Gradient_sigma_1.0_3', 'hessian_sigma_1.0_00', 'hessian_sigma_1.0_01', 'hessian_sigma_1.0_02', 'hessian_sigma_1.0_03', 'hessian_sigma_1.0_11', 'hessian_sigma_1.0_12', 'hessian_sigma_1.0_13', 'hessian_sigma_1.0_22', 'hessian_sigma_1.0_23', 'hessian_sigma_1.0_33', 'Gradient_sigma_3.0_0', 'Gradient_sigma_3.0_1', 'Gradient_sigma_3.0_2', 'Gradient_sigma_3.0_3', 'hessian_sigma_3.0_00', 'hessian_sigma_3.0_01', 'hessian_sigma_3.0_02', 'hessian_sigma_3.0_03', 'hessian_sigma_3.0_11', 'hessian_sigma_3.0_12', 'hessian_sigma_3.0_13', 'hessian_sigma_3.0_22', 'hessian_sigma_3.0_23', 'hessian_sigma_3.0_33', 'Gradient_sigma_6.0_0', 'Gradient_sigma_6.0_1', 'Gradient_sigma_6.0_2', 'Gradient_sigma_6.0_3', 'hessian_sigma_6.0_00', 'hessian_sigma_6.0_01', 'hessian_sigma_6.0_02', 'hessian_sigma_6.0_03', 'hessian_sigma_6.0_11', 'hessian_sigma_6.0_12', 'hessian_sigma_6.0_13', 'hessian_sigma_6.0_22', 'hessian_sigma_6.0_23', 'hessian_sigma_6.0_33', 'Gradient_sigma_2.0_0', 'Gradient_sigma_2.0_1', 'Gradient_sigma_2.0_2', 'Gradient_sigma_2.0_3', 'hessian_sigma_2.0_00', 'hessian_sigma_2.0_01', 'hessian_sigma_2.0_02', 'hessian_sigma_2.0_03', 'hessian_sigma_2.0_11', 'hessian_sigma_2.0_12', 'hessian_sigma_2.0_13', 'hessian_sigma_2.0_22', 'hessian_sigma_2.0_23', 'hessian_sigma_2.0_33', 'Gaussian_time_0.0', 'Gaussian_time_1.0', 'Gaussian_time_3.0', 'Gaussian_time_6.0', 'Gaussian_time_2.0', 'diff_of_gauss_time_1.0_0.0', 'diff_of_gauss_time_3.0_0.0', 'diff_of_gauss_time_6.0_0.0', 'diff_of_gauss_time_2.0_0.0', 'diff_of_gauss_time_3.0_1.0', 'diff_of_gauss_time_6.0_1.0', 'diff_of_gauss_time_2.0_1.0', 'diff_of_gauss_time_6.0_3.0', 'diff_of_gauss_time_2.0_3.0', 'diff_of_gauss_time_2.0_6.0', 'Gaussian_space_0.0', 'Gaussian_space_1.0', 'Gaussian_space_3.0', 'Gaussian_space_6.0', 'Gaussian_space_2.0', 'diff_of_gauss_space_1.0_0.0', 'diff_of_gauss_space_3.0_0.0', 'diff_of_gauss_space_6.0_0.0', 'diff_of_gauss_space_2.0_0.0', 'diff_of_gauss_space_3.0_1.0', 'diff_of_gauss_space_6.0_1.0', 'diff_of_gauss_space_2.0_1.0', 'diff_of_gauss_space_6.0_3.0', 'diff_of_gauss_space_2.0_3.0', 'diff_of_gauss_space_2.0_6.0', 'diff_to_min_', 'diff_temp_min_Gauss_2.0'], dtype='<U27')
- xPandasIndex
PandasIndex(Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ... 740, 741, 742, 743, 744, 745, 746, 747, 748, 749], dtype='int64', name='x', length=750)) - yPandasIndex
PandasIndex(Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ... 330, 331, 332, 333, 334, 335, 336, 337, 338, 339], dtype='int64', name='y', length=340)) - zPandasIndex
PandasIndex(Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ... 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 1915], dtype='int64', name='z', length=1916)) - timePandasIndex
PandasIndex(Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62], dtype='int64', name='time')) - featurePandasIndex
PandasIndex(Index(['Gaussian_4D_Blur_0.0', 'Gaussian_4D_Blur_1.0', 'Gaussian_4D_Blur_3.0', 'Gaussian_4D_Blur_6.0', 'Gaussian_4D_Blur_2.0', 'diff_of_gauss_4D_1.0_0.0', 'diff_of_gauss_4D_3.0_0.0', 'diff_of_gauss_4D_6.0_0.0', 'diff_of_gauss_4D_2.0_0.0', 'diff_of_gauss_4D_3.0_1.0', ... 'diff_of_gauss_space_6.0_0.0', 'diff_of_gauss_space_2.0_0.0', 'diff_of_gauss_space_3.0_1.0', 'diff_of_gauss_space_6.0_1.0', 'diff_of_gauss_space_2.0_1.0', 'diff_of_gauss_space_6.0_3.0', 'diff_of_gauss_space_2.0_3.0', 'diff_of_gauss_space_2.0_6.0', 'diff_to_min_', 'diff_temp_min_Gauss_2.0'], dtype='object', name='feature', length=103))
check aligment of chunks and select suitable "super-chunk" shape to split calculation in parts, example below¶
superchunk has unfortunately and apparently to fit twice into RAM, no idea what's going there
In [52]:
750*2.6
Out[52]:
1950.0
In [53]:
350/50
Out[53]:
7.0
In [54]:
# check length of loops to process entire dataset i = 14 j = i # j = 3 dim1 = 54#better use multiple of chunk size !? dim2 = int(2.6*dim1) feat[i*dim1:(i+1)*dim1,:,j*dim2:(j+1)*dim2,:,:] #select all features and all time steps, but you are free in space; als large as possible and as small as necessary. limit is the available RAM to collect the dask result
Out[54]:
<style>/* CSS stylesheet for displaying xarray objects in jupyterlab.
*
*/
:root {
--xr-font-color0: var(--jp-content-font-color0, rgba(0, 0, 0, 1));
--xr-font-color2: var(--jp-content-font-color2, rgba(0, 0, 0, 0.54));
--xr-font-color3: var(--jp-content-font-color3, rgba(0, 0, 0, 0.38));
--xr-border-color: var(--jp-border-color2, #e0e0e0);
--xr-disabled-color: var(--jp-layout-color3, #bdbdbd);
--xr-background-color: var(--jp-layout-color0, white);
--xr-background-color-row-even: var(--jp-layout-color1, white);
--xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);
}
html[theme=dark],
body[data-theme=dark],
body.vscode-dark {
--xr-font-color0: rgba(255, 255, 255, 1);
--xr-font-color2: rgba(255, 255, 255, 0.54);
--xr-font-color3: rgba(255, 255, 255, 0.38);
--xr-border-color: #1F1F1F;
--xr-disabled-color: #515151;
--xr-background-color: #111111;
--xr-background-color-row-even: #111111;
--xr-background-color-row-odd: #313131;
}
.xr-wrap {
display: block !important;
min-width: 300px;
max-width: 700px;
}
.xr-text-repr-fallback {
/* fallback to plain text repr when CSS is not injected (untrusted notebook) */
display: none;
}
.xr-header {
padding-top: 6px;
padding-bottom: 6px;
margin-bottom: 4px;
border-bottom: solid 1px var(--xr-border-color);
}
.xr-header > div,
.xr-header > ul {
display: inline;
margin-top: 0;
margin-bottom: 0;
}
.xr-obj-type,
.xr-array-name {
margin-left: 2px;
margin-right: 10px;
}
.xr-obj-type {
color: var(--xr-font-color2);
}
.xr-sections {
padding-left: 0 !important;
display: grid;
grid-template-columns: 150px auto auto 1fr 20px 20px;
}
.xr-section-item {
display: contents;
}
.xr-section-item input {
display: none;
}
.xr-section-item input + label {
color: var(--xr-disabled-color);
}
.xr-section-item input:enabled + label {
cursor: pointer;
color: var(--xr-font-color2);
}
.xr-section-item input:enabled + label:hover {
color: var(--xr-font-color0);
}
.xr-section-summary {
grid-column: 1;
color: var(--xr-font-color2);
font-weight: 500;
}
.xr-section-summary > span {
display: inline-block;
padding-left: 0.5em;
}
.xr-section-summary-in:disabled + label {
color: var(--xr-font-color2);
}
.xr-section-summary-in + label:before {
display: inline-block;
content: '►';
font-size: 11px;
width: 15px;
text-align: center;
}
.xr-section-summary-in:disabled + label:before {
color: var(--xr-disabled-color);
}
.xr-section-summary-in:checked + label:before {
content: '▼';
}
.xr-section-summary-in:checked + label > span {
display: none;
}
.xr-section-summary,
.xr-section-inline-details {
padding-top: 4px;
padding-bottom: 4px;
}
.xr-section-inline-details {
grid-column: 2 / -1;
}
.xr-section-details {
display: none;
grid-column: 1 / -1;
margin-bottom: 5px;
}
.xr-section-summary-in:checked ~ .xr-section-details {
display: contents;
}
.xr-array-wrap {
grid-column: 1 / -1;
display: grid;
grid-template-columns: 20px auto;
}
.xr-array-wrap > label {
grid-column: 1;
vertical-align: top;
}
.xr-preview {
color: var(--xr-font-color3);
}
.xr-array-preview,
.xr-array-data {
padding: 0 5px !important;
grid-column: 2;
}
.xr-array-data,
.xr-array-in:checked ~ .xr-array-preview {
display: none;
}
.xr-array-in:checked ~ .xr-array-data,
.xr-array-preview {
display: inline-block;
}
.xr-dim-list {
display: inline-block !important;
list-style: none;
padding: 0 !important;
margin: 0;
}
.xr-dim-list li {
display: inline-block;
padding: 0;
margin: 0;
}
.xr-dim-list:before {
content: '(';
}
.xr-dim-list:after {
content: ')';
}
.xr-dim-list li:not(:last-child):after {
content: ',';
padding-right: 5px;
}
.xr-has-index {
font-weight: bold;
}
.xr-var-list,
.xr-var-item {
display: contents;
}
.xr-var-item > div,
.xr-var-item label,
.xr-var-item > .xr-var-name span {
background-color: var(--xr-background-color-row-even);
margin-bottom: 0;
}
.xr-var-item > .xr-var-name:hover span {
padding-right: 5px;
}
.xr-var-list > li:nth-child(odd) > div,
.xr-var-list > li:nth-child(odd) > label,
.xr-var-list > li:nth-child(odd) > .xr-var-name span {
background-color: var(--xr-background-color-row-odd);
}
.xr-var-name {
grid-column: 1;
}
.xr-var-dims {
grid-column: 2;
}
.xr-var-dtype {
grid-column: 3;
text-align: right;
color: var(--xr-font-color2);
}
.xr-var-preview {
grid-column: 4;
}
.xr-index-preview {
grid-column: 2 / 5;
color: var(--xr-font-color2);
}
.xr-var-name,
.xr-var-dims,
.xr-var-dtype,
.xr-preview,
.xr-attrs dt {
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
padding-right: 10px;
}
.xr-var-name:hover,
.xr-var-dims:hover,
.xr-var-dtype:hover,
.xr-attrs dt:hover {
overflow: visible;
width: auto;
z-index: 1;
}
.xr-var-attrs,
.xr-var-data,
.xr-index-data {
display: none;
background-color: var(--xr-background-color) !important;
padding-bottom: 5px !important;
}
.xr-var-attrs-in:checked ~ .xr-var-attrs,
.xr-var-data-in:checked ~ .xr-var-data,
.xr-index-data-in:checked ~ .xr-index-data {
display: block;
}
.xr-var-data > table {
float: right;
}
.xr-var-name span,
.xr-var-data,
.xr-index-name div,
.xr-index-data,
.xr-attrs {
padding-left: 25px !important;
}
.xr-attrs,
.xr-var-attrs,
.xr-var-data,
.xr-index-data {
grid-column: 1 / -1;
}
dl.xr-attrs {
padding: 0;
margin: 0;
display: grid;
grid-template-columns: 125px auto;
}
.xr-attrs dt,
.xr-attrs dd {
padding: 0;
margin: 0;
float: left;
padding-right: 10px;
width: auto;
}
.xr-attrs dt {
font-weight: normal;
grid-column: 1;
}
.xr-attrs dt:hover span {
display: inline-block;
background: var(--xr-background-color);
padding-right: 10px;
}
.xr-attrs dd {
grid-column: 2;
white-space: pre-wrap;
word-break: break-all;
}
.xr-icon-database,
.xr-icon-file-text2,
.xr-no-icon {
display: inline-block;
vertical-align: middle;
width: 1em;
height: 1.5em !important;
stroke-width: 0;
stroke: currentColor;
fill: currentColor;
}
</style>
<xarray.DataArray 'feature_stack' (x: 0, y: 340, z: 0, time: 63, feature: 103)> dask.array<getitem, shape=(0, 340, 0, 63, 103), dtype=float64, chunksize=(0, 36, 0, 63, 1), chunktype=numpy.ndarray> Coordinates: * x (x) int64 * y (y) int64 0 1 2 3 4 5 6 7 8 ... 331 332 333 334 335 336 337 338 339 * z (z) int64 * time (time) int64 0 1 2 3 4 5 6 7 8 9 ... 53 54 55 56 57 58 59 60 61 62 * feature (feature) <U27 'Gaussian_4D_Blur_0.0' ... 'diff_temp_min_Gauss_2.0'
xarray.DataArray
'feature_stack'
- x: 0
- y: 340
- z: 0
- time: 63
- feature: 103
- dask.array<chunksize=(0, 36, 0, 63, 1), meta=np.ndarray>
Array Chunk Bytes 0 B 0 B Shape (0, 340, 0, 63, 103) (0, 36, 0, 63, 1) Dask graph 1133 chunks in 541 graph layers Data type float64 numpy.ndarray - x(x)int64
array([], dtype=int64)
- y(y)int640 1 2 3 4 5 ... 335 336 337 338 339
array([ 0, 1, 2, ..., 337, 338, 339])
- z(z)int64
array([], dtype=int64)
- time(time)int640 1 2 3 4 5 6 ... 57 58 59 60 61 62
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62]) - feature(feature)<U27'Gaussian_4D_Blur_0.0' ... 'diff...
array(['Gaussian_4D_Blur_0.0', 'Gaussian_4D_Blur_1.0', 'Gaussian_4D_Blur_3.0', 'Gaussian_4D_Blur_6.0', 'Gaussian_4D_Blur_2.0', 'diff_of_gauss_4D_1.0_0.0', 'diff_of_gauss_4D_3.0_0.0', 'diff_of_gauss_4D_6.0_0.0', 'diff_of_gauss_4D_2.0_0.0', 'diff_of_gauss_4D_3.0_1.0', 'diff_of_gauss_4D_6.0_1.0', 'diff_of_gauss_4D_2.0_1.0', 'diff_of_gauss_4D_6.0_3.0', 'diff_of_gauss_4D_2.0_3.0', 'diff_of_gauss_4D_2.0_6.0', 'Gradient_sigma_1.0_0', 'Gradient_sigma_1.0_1', 'Gradient_sigma_1.0_2', 'Gradient_sigma_1.0_3', 'hessian_sigma_1.0_00', 'hessian_sigma_1.0_01', 'hessian_sigma_1.0_02', 'hessian_sigma_1.0_03', 'hessian_sigma_1.0_11', 'hessian_sigma_1.0_12', 'hessian_sigma_1.0_13', 'hessian_sigma_1.0_22', 'hessian_sigma_1.0_23', 'hessian_sigma_1.0_33', 'Gradient_sigma_3.0_0', 'Gradient_sigma_3.0_1', 'Gradient_sigma_3.0_2', 'Gradient_sigma_3.0_3', 'hessian_sigma_3.0_00', 'hessian_sigma_3.0_01', 'hessian_sigma_3.0_02', 'hessian_sigma_3.0_03', 'hessian_sigma_3.0_11', 'hessian_sigma_3.0_12', 'hessian_sigma_3.0_13', 'hessian_sigma_3.0_22', 'hessian_sigma_3.0_23', 'hessian_sigma_3.0_33', 'Gradient_sigma_6.0_0', 'Gradient_sigma_6.0_1', 'Gradient_sigma_6.0_2', 'Gradient_sigma_6.0_3', 'hessian_sigma_6.0_00', 'hessian_sigma_6.0_01', 'hessian_sigma_6.0_02', 'hessian_sigma_6.0_03', 'hessian_sigma_6.0_11', 'hessian_sigma_6.0_12', 'hessian_sigma_6.0_13', 'hessian_sigma_6.0_22', 'hessian_sigma_6.0_23', 'hessian_sigma_6.0_33', 'Gradient_sigma_2.0_0', 'Gradient_sigma_2.0_1', 'Gradient_sigma_2.0_2', 'Gradient_sigma_2.0_3', 'hessian_sigma_2.0_00', 'hessian_sigma_2.0_01', 'hessian_sigma_2.0_02', 'hessian_sigma_2.0_03', 'hessian_sigma_2.0_11', 'hessian_sigma_2.0_12', 'hessian_sigma_2.0_13', 'hessian_sigma_2.0_22', 'hessian_sigma_2.0_23', 'hessian_sigma_2.0_33', 'Gaussian_time_0.0', 'Gaussian_time_1.0', 'Gaussian_time_3.0', 'Gaussian_time_6.0', 'Gaussian_time_2.0', 'diff_of_gauss_time_1.0_0.0', 'diff_of_gauss_time_3.0_0.0', 'diff_of_gauss_time_6.0_0.0', 'diff_of_gauss_time_2.0_0.0', 'diff_of_gauss_time_3.0_1.0', 'diff_of_gauss_time_6.0_1.0', 'diff_of_gauss_time_2.0_1.0', 'diff_of_gauss_time_6.0_3.0', 'diff_of_gauss_time_2.0_3.0', 'diff_of_gauss_time_2.0_6.0', 'Gaussian_space_0.0', 'Gaussian_space_1.0', 'Gaussian_space_3.0', 'Gaussian_space_6.0', 'Gaussian_space_2.0', 'diff_of_gauss_space_1.0_0.0', 'diff_of_gauss_space_3.0_0.0', 'diff_of_gauss_space_6.0_0.0', 'diff_of_gauss_space_2.0_0.0', 'diff_of_gauss_space_3.0_1.0', 'diff_of_gauss_space_6.0_1.0', 'diff_of_gauss_space_2.0_1.0', 'diff_of_gauss_space_6.0_3.0', 'diff_of_gauss_space_2.0_3.0', 'diff_of_gauss_space_2.0_6.0', 'diff_to_min_', 'diff_temp_min_Gauss_2.0'], dtype='<U27')
- xPandasIndex
PandasIndex(Index([], dtype='int64', name='x'))
- yPandasIndex
PandasIndex(Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ... 330, 331, 332, 333, 334, 335, 336, 337, 338, 339], dtype='int64', name='y', length=340)) - zPandasIndex
PandasIndex(Index([], dtype='int64', name='z'))
- timePandasIndex
PandasIndex(Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62], dtype='int64', name='time')) - featurePandasIndex
PandasIndex(Index(['Gaussian_4D_Blur_0.0', 'Gaussian_4D_Blur_1.0', 'Gaussian_4D_Blur_3.0', 'Gaussian_4D_Blur_6.0', 'Gaussian_4D_Blur_2.0', 'diff_of_gauss_4D_1.0_0.0', 'diff_of_gauss_4D_3.0_0.0', 'diff_of_gauss_4D_6.0_0.0', 'diff_of_gauss_4D_2.0_0.0', 'diff_of_gauss_4D_3.0_1.0', ... 'diff_of_gauss_space_6.0_0.0', 'diff_of_gauss_space_2.0_0.0', 'diff_of_gauss_space_3.0_1.0', 'diff_of_gauss_space_6.0_1.0', 'diff_of_gauss_space_2.0_1.0', 'diff_of_gauss_space_6.0_3.0', 'diff_of_gauss_space_2.0_3.0', 'diff_of_gauss_space_2.0_6.0', 'diff_to_min_', 'diff_temp_min_Gauss_2.0'], dtype='object', name='feature', length=103))
In [55]:
# segs = np.zeros(feat.shape[:4], dtype=np.uint8) # segs = pickle.load(open(os.path.join(training_path,'segs.p'), 'rb')) segs = pickle.load(open(os.path.join(temppath,'segs.p')), 'rb'))
In [56]:
# del segs
In [57]:
# import gc client.run(gc.collect) gc.collect()
Out[57]:
50
In [58]:
import ctypes def trim_memory() -> int: libc = ctypes.CDLL("libc.so.6") return libc.malloc_trim(0)
In [59]:
trim_memory()
Out[59]:
1
In [ ]:
limit = 14 # TODO: Workaround: write intermediates to disk and restart dask client/scheduler/workers to get rid of unmanaged memory # possible solution: do data handling on mpc2053 and calculations on mpc2959 -> leaves some RAM --> gc.collect() not necessary anymore # appearantly starts to become critical after 12 iteratons # from i=7, j=10 already done # TODO write i and j to file to track progress even when closing jupyter for i in range(4,limit): # pickle.dump(segs, open(os.path.join(training_path,'segs_temp.p'), 'wb')) print(str(i+1)+'/'+str(limit)) # plt.figure() # plt.imshow(segs[:,100,:, 50]) # plt.savefig(os.path.join(training_path, sample+'_'+str(i)+'_progress.png')) # client.run(gc.collect) # client.run(trim_memory) # trim_memory() start = 0 if i == 4: start = 7 for j in range(start,limit): gc.collect() client.run(gc.collect) client.run(trim_memory) trim_memory() print(j) #with joblib.parallel_backend('dask'): # part = feat[i*dim1:(i+1)*dim1,:,j*dim2:(j+1)*dim2,:,:] #.persist() #compute() may blow up the memory ?! https://stackoverflow.com/questions/73770527/dask-compute-uses-twice-the-expected-memory part = feat[i*dim1:(i+1)*dim1,:,j*dim2:(j+1)*dim2,:,:] part_idp = feat_idp[i*dim1:(i+1)*dim1,:,j*dim2:(j+1)*dim2,:] if 0 in part.shape: print('hit the edge (one dimension 0), ignore') continue if type(part) is not np.ndarray: fut = client.scatter(part) fut = fut.result() fut = fut.compute() part = fut try: client.restart() except: client = reboot_client(client) # if j == 0: print('now the time independent part') if type(part_idp) is not np.ndarray: fut = client.scatter(part_idp) fut = fut.result() fut = fut.compute() part_idp = fut try: client.restart() except: client = reboot_client(client) # feat = dask.array.concatenate([TS.feat_data['feature_stack'], test], axis=-1) # dask.array.stack([TS.feat_data['feature_stack_time_independent'][:,:,:,0,:]]*da.shape[-1], axis=-2) part_idp = np.stack([part_idp]*da.shape[-1], axis=-2) part = np.concatenate([part, part_idp], axis = -1) del part_idp # part = part.compute() shp = part.shape num_feat = part.shape[-1] part = part.reshape(-1,num_feat) psplit = int(part.shape[0]/2) # print('create part 1') part1 = part[:psplit,:] # print('create part 2') part2 = part[psplit:,:] # print('segmenting 1') # with joblib.parallel_backend('dask'): seg1 = clf.predict(part1).astype(np.uint8) del part1 # print('segmenting 2') # with joblib.parallel_backend('dask'): seg2 = clf.predict(part2).astype(np.uint8) # print('wrap results') del part2 del part # gc.collect() seg = np.concatenate([seg1,seg2]) print(seg.shape) del seg1 del seg2 #this step needs a lot of RAM ?! appearantly not # plt.dump(seg, open(os.path.join(training_path,'seg_'+str(i)+'_'+str(j)+'.p'), 'wb')) # put segs together when all calculated seg = seg.reshape(shp[:4]) # not sure if this switch cases are necessary if i < limit-1 and j < limit-1: segs[i*dim1:(i+1)*dim1,:,j*dim2:(j+1)*dim2,:] = seg elif not i < limit-1 and j < limit-1: segs[i*dim1:,:,j*dim2:(j+1)*dim2,:] = seg elif not j < limit-1 and i < limit-1: segs[i*dim1:(i+1)*dim1,:,j*dim2:,:] = seg else: segs[i*dim1:,:,j*dim2:,:] = seg del seg
5/14 7
/mpc/homes/fische_r/miniconda3/lib/python3.11/site-packages/distributed/client.py:3149: UserWarning: Sending large graph of size 12.53 MiB. This may cause some slowdown. Consider scattering data ahead of time and using futures. warnings.warn( 2023-11-06 08:55:05,396 - distributed.nanny - WARNING - Restarting worker 2023-11-06 08:55:11,790 - distributed.nanny - WARNING - Restarting worker 2023-11-06 09:00:09,419 - distributed.nanny - WARNING - Restarting worker 2023-11-06 09:00:10,414 - distributed.nanny - WARNING - Restarting worker
(161935200,) 8
/mpc/homes/fische_r/miniconda3/lib/python3.11/site-packages/distributed/client.py:3149: UserWarning: Sending large graph of size 10.41 MiB. This may cause some slowdown. Consider scattering data ahead of time and using futures. warnings.warn( 2023-11-06 09:17:54,795 - distributed.nanny - WARNING - Restarting worker 2023-11-06 09:18:01,541 - distributed.nanny - WARNING - Restarting worker 2023-11-06 09:22:40,612 - distributed.nanny - WARNING - Restarting worker 2023-11-06 09:22:41,556 - distributed.nanny - WARNING - Restarting worker
(161935200,) 9
2023-11-06 09:35:24,797 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%) 2023-11-06 09:35:37,336 - distributed.utils_perf - WARNING - full garbage collections took 20% CPU time recently (threshold: 10%) 2023-11-06 09:35:53,042 - distributed.utils_perf - WARNING - full garbage collections took 22% CPU time recently (threshold: 10%) 2023-11-06 09:36:15,587 - distributed.utils_perf - WARNING - full garbage collections took 23% CPU time recently (threshold: 10%) /mpc/homes/fische_r/miniconda3/lib/python3.11/site-packages/distributed/client.py:3149: UserWarning: Sending large graph of size 10.41 MiB. This may cause some slowdown. Consider scattering data ahead of time and using futures. warnings.warn( 2023-11-06 09:40:50,631 - distributed.nanny - WARNING - Restarting worker 2023-11-06 09:40:57,532 - distributed.nanny - WARNING - Restarting worker 2023-11-06 09:45:39,235 - distributed.nanny - WARNING - Restarting worker 2023-11-06 09:45:40,160 - distributed.nanny - WARNING - Restarting worker
(161935200,)
2023-11-06 09:52:34,605 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
10
2023-11-06 09:57:30,109 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%) 2023-11-06 09:57:44,375 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%) 2023-11-06 09:57:51,901 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%) 2023-11-06 09:58:01,737 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%) 2023-11-06 09:58:14,450 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%) 2023-11-06 09:58:30,600 - distributed.utils_perf - WARNING - full garbage collections took 20% CPU time recently (threshold: 10%) 2023-11-06 09:58:53,584 - distributed.utils_perf - WARNING - full garbage collections took 21% CPU time recently (threshold: 10%) /mpc/homes/fische_r/miniconda3/lib/python3.11/site-packages/distributed/client.py:3149: UserWarning: Sending large graph of size 12.53 MiB. This may cause some slowdown. Consider scattering data ahead of time and using futures. warnings.warn( 2023-11-06 10:03:51,077 - distributed.nanny - WARNING - Restarting worker 2023-11-06 10:03:57,405 - distributed.nanny - WARNING - Restarting worker 2023-11-06 10:08:53,941 - distributed.nanny - WARNING - Restarting worker 2023-11-06 10:08:55,895 - distributed.nanny - WARNING - Restarting worker
(161935200,)
2023-11-06 10:16:32,179 - distributed.utils_perf - WARNING - full garbage collections took 20% CPU time recently (threshold: 10%)
11
2023-11-06 10:21:25,774 - distributed.utils_perf - WARNING - full garbage collections took 20% CPU time recently (threshold: 10%) 2023-11-06 10:21:41,001 - distributed.utils_perf - WARNING - full garbage collections took 20% CPU time recently (threshold: 10%) 2023-11-06 10:21:48,413 - distributed.utils_perf - WARNING - full garbage collections took 20% CPU time recently (threshold: 10%) 2023-11-06 10:21:58,320 - distributed.utils_perf - WARNING - full garbage collections took 20% CPU time recently (threshold: 10%) 2023-11-06 10:22:11,508 - distributed.utils_perf - WARNING - full garbage collections took 21% CPU time recently (threshold: 10%) 2023-11-06 10:22:27,356 - distributed.utils_perf - WARNING - full garbage collections took 21% CPU time recently (threshold: 10%) 2023-11-06 10:22:50,498 - distributed.utils_perf - WARNING - full garbage collections took 22% CPU time recently (threshold: 10%) /mpc/homes/fische_r/miniconda3/lib/python3.11/site-packages/distributed/client.py:3149: UserWarning: Sending large graph of size 12.53 MiB. This may cause some slowdown. Consider scattering data ahead of time and using futures. warnings.warn( 2023-11-06 10:27:41,827 - distributed.nanny - WARNING - Restarting worker 2023-11-06 10:27:47,989 - distributed.nanny - WARNING - Restarting worker 2023-11-06 10:32:42,578 - distributed.nanny - WARNING - Restarting worker 2023-11-06 10:32:44,713 - distributed.nanny - WARNING - Restarting worker
(161935200,)
2023-11-06 10:40:31,128 - distributed.utils_perf - WARNING - full garbage collections took 20% CPU time recently (threshold: 10%)
12
2023-11-06 10:45:21,833 - distributed.utils_perf - WARNING - full garbage collections took 20% CPU time recently (threshold: 10%) 2023-11-06 10:45:36,824 - distributed.utils_perf - WARNING - full garbage collections took 20% CPU time recently (threshold: 10%) 2023-11-06 10:45:44,516 - distributed.utils_perf - WARNING - full garbage collections took 20% CPU time recently (threshold: 10%) 2023-11-06 10:45:54,646 - distributed.utils_perf - WARNING - full garbage collections took 21% CPU time recently (threshold: 10%) 2023-11-06 10:46:07,675 - distributed.utils_perf - WARNING - full garbage collections took 21% CPU time recently (threshold: 10%) 2023-11-06 10:46:23,604 - distributed.utils_perf - WARNING - full garbage collections took 21% CPU time recently (threshold: 10%) 2023-11-06 10:46:46,848 - distributed.utils_perf - WARNING - full garbage collections took 21% CPU time recently (threshold: 10%) /mpc/homes/fische_r/miniconda3/lib/python3.11/site-packages/distributed/client.py:3149: UserWarning: Sending large graph of size 12.52 MiB. This may cause some slowdown. Consider scattering data ahead of time and using futures. warnings.warn( 2023-11-06 10:51:45,842 - distributed.nanny - WARNING - Restarting worker 2023-11-06 10:51:52,256 - distributed.nanny - WARNING - Restarting worker 2023-11-06 10:56:44,566 - distributed.nanny - WARNING - Restarting worker 2023-11-06 10:56:46,646 - distributed.nanny - WARNING - Restarting worker
(161935200,)
2023-11-06 11:04:38,909 - distributed.utils_perf - WARNING - full garbage collections took 22% CPU time recently (threshold: 10%)
13
2023-11-06 11:09:29,488 - distributed.utils_perf - WARNING - full garbage collections took 22% CPU time recently (threshold: 10%) 2023-11-06 11:09:44,479 - distributed.utils_perf - WARNING - full garbage collections took 22% CPU time recently (threshold: 10%) 2023-11-06 11:09:52,132 - distributed.utils_perf - WARNING - full garbage collections took 22% CPU time recently (threshold: 10%) 2023-11-06 11:10:02,367 - distributed.utils_perf - WARNING - full garbage collections took 22% CPU time recently (threshold: 10%) 2023-11-06 11:10:15,430 - distributed.utils_perf - WARNING - full garbage collections took 23% CPU time recently (threshold: 10%) 2023-11-06 11:10:31,395 - distributed.utils_perf - WARNING - full garbage collections took 23% CPU time recently (threshold: 10%) 2023-11-06 11:10:54,757 - distributed.utils_perf - WARNING - full garbage collections took 23% CPU time recently (threshold: 10%) 2023-11-06 11:12:19,552 - distributed.utils_perf - WARNING - full garbage collections took 11% CPU time recently (threshold: 10%) 2023-11-06 11:12:26,519 - distributed.utils_perf - WARNING - full garbage collections took 13% CPU time recently (threshold: 10%) 2023-11-06 11:14:20,734 - distributed.nanny - WARNING - Restarting worker 2023-11-06 11:14:26,757 - distributed.nanny - WARNING - Restarting worker 2023-11-06 11:19:06,466 - distributed.nanny - WARNING - Restarting worker 2023-11-06 11:19:07,442 - distributed.nanny - WARNING - Restarting worker
(111041280,) 6/14
2023-11-06 11:23:51,742 - distributed.utils_perf - WARNING - full garbage collections took 24% CPU time recently (threshold: 10%)
0
2023-11-06 11:28:49,177 - distributed.utils_perf - WARNING - full garbage collections took 24% CPU time recently (threshold: 10%) 2023-11-06 11:29:04,508 - distributed.utils_perf - WARNING - full garbage collections took 24% CPU time recently (threshold: 10%) 2023-11-06 11:29:12,164 - distributed.utils_perf - WARNING - full garbage collections took 24% CPU time recently (threshold: 10%) 2023-11-06 11:29:22,661 - distributed.utils_perf - WARNING - full garbage collections took 24% CPU time recently (threshold: 10%) 2023-11-06 11:29:35,824 - distributed.utils_perf - WARNING - full garbage collections took 24% CPU time recently (threshold: 10%) 2023-11-06 11:29:52,164 - distributed.utils_perf - WARNING - full garbage collections took 24% CPU time recently (threshold: 10%) 2023-11-06 11:30:16,610 - distributed.utils_perf - WARNING - full garbage collections took 24% CPU time recently (threshold: 10%) 2023-11-06 11:34:32,664 - distributed.nanny - WARNING - Restarting worker 2023-11-06 11:34:38,824 - distributed.nanny - WARNING - Restarting worker 2023-11-06 11:39:23,902 - distributed.nanny - WARNING - Restarting worker 2023-11-06 11:39:24,886 - distributed.nanny - WARNING - Restarting worker
(161935200,)
2023-11-06 11:46:05,926 - distributed.utils_perf - WARNING - full garbage collections took 24% CPU time recently (threshold: 10%)
1
2023-11-06 11:51:04,631 - distributed.utils_perf - WARNING - full garbage collections took 24% CPU time recently (threshold: 10%) 2023-11-06 11:51:20,234 - distributed.utils_perf - WARNING - full garbage collections took 24% CPU time recently (threshold: 10%) 2023-11-06 11:51:28,624 - distributed.utils_perf - WARNING - full garbage collections took 24% CPU time recently (threshold: 10%) 2023-11-06 11:51:39,127 - distributed.utils_perf - WARNING - full garbage collections took 24% CPU time recently (threshold: 10%) 2023-11-06 11:51:52,647 - distributed.utils_perf - WARNING - full garbage collections took 24% CPU time recently (threshold: 10%) 2023-11-06 11:52:09,457 - distributed.utils_perf - WARNING - full garbage collections took 24% CPU time recently (threshold: 10%) 2023-11-06 11:52:35,209 - distributed.utils_perf - WARNING - full garbage collections took 24% CPU time recently (threshold: 10%) /mpc/homes/fische_r/miniconda3/lib/python3.11/site-packages/distributed/client.py:3149: UserWarning: Sending large graph of size 12.51 MiB. This may cause some slowdown. Consider scattering data ahead of time and using futures. warnings.warn( 2023-11-06 11:57:32,692 - distributed.nanny - WARNING - Restarting worker 2023-11-06 11:57:38,815 - distributed.nanny - WARNING - Restarting worker 2023-11-06 12:02:21,639 - distributed.nanny - WARNING - Restarting worker 2023-11-06 12:02:22,660 - distributed.nanny - WARNING - Restarting worker
(161935200,)
2023-11-06 12:09:08,272 - distributed.utils_perf - WARNING - full garbage collections took 23% CPU time recently (threshold: 10%)
2
2023-11-06 12:14:05,789 - distributed.utils_perf - WARNING - full garbage collections took 23% CPU time recently (threshold: 10%) 2023-11-06 12:14:21,448 - distributed.utils_perf - WARNING - full garbage collections took 23% CPU time recently (threshold: 10%) 2023-11-06 12:14:29,838 - distributed.utils_perf - WARNING - full garbage collections took 23% CPU time recently (threshold: 10%) 2023-11-06 12:14:40,359 - distributed.utils_perf - WARNING - full garbage collections took 23% CPU time recently (threshold: 10%) 2023-11-06 12:14:53,881 - distributed.utils_perf - WARNING - full garbage collections took 23% CPU time recently (threshold: 10%) 2023-11-06 12:15:10,645 - distributed.utils_perf - WARNING - full garbage collections took 23% CPU time recently (threshold: 10%) 2023-11-06 12:15:36,436 - distributed.utils_perf - WARNING - full garbage collections took 23% CPU time recently (threshold: 10%) /mpc/homes/fische_r/miniconda3/lib/python3.11/site-packages/distributed/client.py:3149: UserWarning: Sending large graph of size 12.54 MiB. This may cause some slowdown. Consider scattering data ahead of time and using futures. warnings.warn( 2023-11-06 12:20:27,864 - distributed.nanny - WARNING - Restarting worker 2023-11-06 12:20:34,100 - distributed.nanny - WARNING - Restarting worker 2023-11-06 12:25:21,387 - distributed.nanny - WARNING - Restarting worker 2023-11-06 12:25:22,326 - distributed.nanny - WARNING - Restarting worker
(161935200,)
2023-11-06 12:32:07,763 - distributed.utils_perf - WARNING - full garbage collections took 21% CPU time recently (threshold: 10%)
3
/mpc/homes/fische_r/miniconda3/lib/python3.11/site-packages/distributed/client.py:3149: UserWarning: Sending large graph of size 12.54 MiB. This may cause some slowdown. Consider scattering data ahead of time and using futures. warnings.warn( 2023-11-06 12:43:37,152 - distributed.nanny - WARNING - Restarting worker 2023-11-06 12:43:43,362 - distributed.nanny - WARNING - Restarting worker 2023-11-06 12:48:25,187 - distributed.nanny - WARNING - Restarting worker 2023-11-06 12:48:26,174 - distributed.nanny - WARNING - Restarting worker
(161935200,) 4
/mpc/homes/fische_r/miniconda3/lib/python3.11/site-packages/distributed/client.py:3149: UserWarning: Sending large graph of size 12.54 MiB. This may cause some slowdown. Consider scattering data ahead of time and using futures. warnings.warn( 2023-11-06 13:06:53,039 - distributed.nanny - WARNING - Restarting worker 2023-11-06 13:06:59,293 - distributed.nanny - WARNING - Restarting worker 2023-11-06 13:11:42,160 - distributed.nanny - WARNING - Restarting worker 2023-11-06 13:11:43,184 - distributed.nanny - WARNING - Restarting worker
(161935200,) 5
/mpc/homes/fische_r/miniconda3/lib/python3.11/site-packages/distributed/client.py:3149: UserWarning: Sending large graph of size 12.54 MiB. This may cause some slowdown. Consider scattering data ahead of time and using futures. warnings.warn( 2023-11-06 13:29:37,626 - distributed.nanny - WARNING - Restarting worker 2023-11-06 13:29:44,513 - distributed.nanny - WARNING - Restarting worker 2023-11-06 13:34:26,730 - distributed.nanny - WARNING - Restarting worker 2023-11-06 13:34:27,716 - distributed.nanny - WARNING - Restarting worker
(161935200,) 6
/mpc/homes/fische_r/miniconda3/lib/python3.11/site-packages/distributed/client.py:3149: UserWarning: Sending large graph of size 12.54 MiB. This may cause some slowdown. Consider scattering data ahead of time and using futures. warnings.warn( 2023-11-06 13:52:41,781 - distributed.nanny - WARNING - Restarting worker 2023-11-06 13:52:47,984 - distributed.nanny - WARNING - Restarting worker 2023-11-06 13:57:29,691 - distributed.nanny - WARNING - Restarting worker 2023-11-06 13:57:30,680 - distributed.nanny - WARNING - Restarting worker
(161935200,) 7
/mpc/homes/fische_r/miniconda3/lib/python3.11/site-packages/distributed/client.py:3149: UserWarning: Sending large graph of size 12.54 MiB. This may cause some slowdown. Consider scattering data ahead of time and using futures. warnings.warn( 2023-11-06 14:15:53,186 - distributed.nanny - WARNING - Restarting worker 2023-11-06 14:15:59,358 - distributed.nanny - WARNING - Restarting worker 2023-11-06 14:20:43,287 - distributed.nanny - WARNING - Restarting worker 2023-11-06 14:20:44,273 - distributed.nanny - WARNING - Restarting worker
(161935200,) 8
/mpc/homes/fische_r/miniconda3/lib/python3.11/site-packages/distributed/client.py:3149: UserWarning: Sending large graph of size 10.42 MiB. This may cause some slowdown. Consider scattering data ahead of time and using futures. warnings.warn( 2023-11-06 14:38:47,636 - distributed.nanny - WARNING - Restarting worker 2023-11-06 14:38:53,826 - distributed.nanny - WARNING - Restarting worker 2023-11-06 14:43:36,553 - distributed.nanny - WARNING - Restarting worker 2023-11-06 14:43:37,502 - distributed.nanny - WARNING - Restarting worker
(161935200,) 9
/mpc/homes/fische_r/miniconda3/lib/python3.11/site-packages/distributed/client.py:3149: UserWarning: Sending large graph of size 10.42 MiB. This may cause some slowdown. Consider scattering data ahead of time and using futures. warnings.warn( 2023-11-06 15:01:50,088 - distributed.nanny - WARNING - Restarting worker 2023-11-06 15:01:56,220 - distributed.nanny - WARNING - Restarting worker 2023-11-06 15:06:46,574 - distributed.nanny - WARNING - Restarting worker 2023-11-06 15:06:47,510 - distributed.nanny - WARNING - Restarting worker
(161935200,) 10
/mpc/homes/fische_r/miniconda3/lib/python3.11/site-packages/distributed/client.py:3149: UserWarning: Sending large graph of size 12.54 MiB. This may cause some slowdown. Consider scattering data ahead of time and using futures. warnings.warn( 2023-11-06 15:24:58,452 - distributed.nanny - WARNING - Restarting worker 2023-11-06 15:25:05,685 - distributed.nanny - WARNING - Restarting worker 2023-11-06 15:29:46,644 - distributed.nanny - WARNING - Restarting worker 2023-11-06 15:29:47,627 - distributed.nanny - WARNING - Restarting worker
(161935200,) 11
/mpc/homes/fische_r/miniconda3/lib/python3.11/site-packages/distributed/client.py:3149: UserWarning: Sending large graph of size 12.54 MiB. This may cause some slowdown. Consider scattering data ahead of time and using futures. warnings.warn( 2023-11-06 15:48:13,771 - distributed.nanny - WARNING - Restarting worker 2023-11-06 15:48:20,001 - distributed.nanny - WARNING - Restarting worker
In [61]:
client.cluster.workers
Out[61]:
{0: <Nanny: tcp://127.0.0.1:34233, threads: 64>,
1: <Nanny: tcp://127.0.0.1:37813, threads: 64>}
In [67]:
i
Out[67]:
4
In [68]:
j
Out[68]:
7
In [69]:
plt.imshow(segs[:,50,:, 20])
Out[69]:
<matplotlib.image.AxesImage at 0x7f54f1dd3050>
In [ ]:
segs1 = pickle.load(open(os.path.join(training_path,'segs.p'), 'rb'))
In [ ]:
plt.imshow(segs1[:,50,:, 20])
In [ ]:
segs = segs+segs1
In [70]:
# pickle.dump(segs, open(os.path.join(training_path,'segs.p'), 'wb')) pickle.dump(segs, open(os.path.join(temppath,'segs.p'), 'wb'))
save result to disk¶
In [67]:
# TODO: include metadata in segmented nc and shp = segs.shape segdata = xr.Dataset({'segmented': (['x','y','z','timestep'], segs), 't_utc': ('timestep', t_utc), 'time': ('timestep', time)}, coords = {'x': np.arange(shp[0]), 'y': np.arange(shp[1]), 'z': np.arange(shp[2]), 'timestep': np.arange(shp[3]), 'feature': TS.combined_feature_names} ) segdata.attrs = data.attrs.copy() segdata.attrs['05_ML_cropping'] = [a,b,c,d,e,f] segdata.attrs['pytrain_git'] = pytrain_git_sha segdata.attrs['05_coely_gitsha'] = git_sha segdata.attrs['GDL_crop'] = GDL_crop
In [68]:
segpath = os.path.join(training_path_sample, sample+'water_segmentation.nc')
In [69]:
segdata.to_netcdf(segpath)
In [75]:
# load intermediate result seg_data = xr.load_dataset(segpath)
In [37]:
segs = seg_data['segmented'].data
In [61]:
segdata
Out[61]:
<style>/* CSS stylesheet for displaying xarray objects in jupyterlab.
*
*/
:root {
--xr-font-color0: var(--jp-content-font-color0, rgba(0, 0, 0, 1));
--xr-font-color2: var(--jp-content-font-color2, rgba(0, 0, 0, 0.54));
--xr-font-color3: var(--jp-content-font-color3, rgba(0, 0, 0, 0.38));
--xr-border-color: var(--jp-border-color2, #e0e0e0);
--xr-disabled-color: var(--jp-layout-color3, #bdbdbd);
--xr-background-color: var(--jp-layout-color0, white);
--xr-background-color-row-even: var(--jp-layout-color1, white);
--xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);
}
html[theme=dark],
body[data-theme=dark],
body.vscode-dark {
--xr-font-color0: rgba(255, 255, 255, 1);
--xr-font-color2: rgba(255, 255, 255, 0.54);
--xr-font-color3: rgba(255, 255, 255, 0.38);
--xr-border-color: #1F1F1F;
--xr-disabled-color: #515151;
--xr-background-color: #111111;
--xr-background-color-row-even: #111111;
--xr-background-color-row-odd: #313131;
}
.xr-wrap {
display: block !important;
min-width: 300px;
max-width: 700px;
}
.xr-text-repr-fallback {
/* fallback to plain text repr when CSS is not injected (untrusted notebook) */
display: none;
}
.xr-header {
padding-top: 6px;
padding-bottom: 6px;
margin-bottom: 4px;
border-bottom: solid 1px var(--xr-border-color);
}
.xr-header > div,
.xr-header > ul {
display: inline;
margin-top: 0;
margin-bottom: 0;
}
.xr-obj-type,
.xr-array-name {
margin-left: 2px;
margin-right: 10px;
}
.xr-obj-type {
color: var(--xr-font-color2);
}
.xr-sections {
padding-left: 0 !important;
display: grid;
grid-template-columns: 150px auto auto 1fr 20px 20px;
}
.xr-section-item {
display: contents;
}
.xr-section-item input {
display: none;
}
.xr-section-item input + label {
color: var(--xr-disabled-color);
}
.xr-section-item input:enabled + label {
cursor: pointer;
color: var(--xr-font-color2);
}
.xr-section-item input:enabled + label:hover {
color: var(--xr-font-color0);
}
.xr-section-summary {
grid-column: 1;
color: var(--xr-font-color2);
font-weight: 500;
}
.xr-section-summary > span {
display: inline-block;
padding-left: 0.5em;
}
.xr-section-summary-in:disabled + label {
color: var(--xr-font-color2);
}
.xr-section-summary-in + label:before {
display: inline-block;
content: '►';
font-size: 11px;
width: 15px;
text-align: center;
}
.xr-section-summary-in:disabled + label:before {
color: var(--xr-disabled-color);
}
.xr-section-summary-in:checked + label:before {
content: '▼';
}
.xr-section-summary-in:checked + label > span {
display: none;
}
.xr-section-summary,
.xr-section-inline-details {
padding-top: 4px;
padding-bottom: 4px;
}
.xr-section-inline-details {
grid-column: 2 / -1;
}
.xr-section-details {
display: none;
grid-column: 1 / -1;
margin-bottom: 5px;
}
.xr-section-summary-in:checked ~ .xr-section-details {
display: contents;
}
.xr-array-wrap {
grid-column: 1 / -1;
display: grid;
grid-template-columns: 20px auto;
}
.xr-array-wrap > label {
grid-column: 1;
vertical-align: top;
}
.xr-preview {
color: var(--xr-font-color3);
}
.xr-array-preview,
.xr-array-data {
padding: 0 5px !important;
grid-column: 2;
}
.xr-array-data,
.xr-array-in:checked ~ .xr-array-preview {
display: none;
}
.xr-array-in:checked ~ .xr-array-data,
.xr-array-preview {
display: inline-block;
}
.xr-dim-list {
display: inline-block !important;
list-style: none;
padding: 0 !important;
margin: 0;
}
.xr-dim-list li {
display: inline-block;
padding: 0;
margin: 0;
}
.xr-dim-list:before {
content: '(';
}
.xr-dim-list:after {
content: ')';
}
.xr-dim-list li:not(:last-child):after {
content: ',';
padding-right: 5px;
}
.xr-has-index {
font-weight: bold;
}
.xr-var-list,
.xr-var-item {
display: contents;
}
.xr-var-item > div,
.xr-var-item label,
.xr-var-item > .xr-var-name span {
background-color: var(--xr-background-color-row-even);
margin-bottom: 0;
}
.xr-var-item > .xr-var-name:hover span {
padding-right: 5px;
}
.xr-var-list > li:nth-child(odd) > div,
.xr-var-list > li:nth-child(odd) > label,
.xr-var-list > li:nth-child(odd) > .xr-var-name span {
background-color: var(--xr-background-color-row-odd);
}
.xr-var-name {
grid-column: 1;
}
.xr-var-dims {
grid-column: 2;
}
.xr-var-dtype {
grid-column: 3;
text-align: right;
color: var(--xr-font-color2);
}
.xr-var-preview {
grid-column: 4;
}
.xr-index-preview {
grid-column: 2 / 5;
color: var(--xr-font-color2);
}
.xr-var-name,
.xr-var-dims,
.xr-var-dtype,
.xr-preview,
.xr-attrs dt {
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
padding-right: 10px;
}
.xr-var-name:hover,
.xr-var-dims:hover,
.xr-var-dtype:hover,
.xr-attrs dt:hover {
overflow: visible;
width: auto;
z-index: 1;
}
.xr-var-attrs,
.xr-var-data,
.xr-index-data {
display: none;
background-color: var(--xr-background-color) !important;
padding-bottom: 5px !important;
}
.xr-var-attrs-in:checked ~ .xr-var-attrs,
.xr-var-data-in:checked ~ .xr-var-data,
.xr-index-data-in:checked ~ .xr-index-data {
display: block;
}
.xr-var-data > table {
float: right;
}
.xr-var-name span,
.xr-var-data,
.xr-index-name div,
.xr-index-data,
.xr-attrs {
padding-left: 25px !important;
}
.xr-attrs,
.xr-var-attrs,
.xr-var-data,
.xr-index-data {
grid-column: 1 / -1;
}
dl.xr-attrs {
padding: 0;
margin: 0;
display: grid;
grid-template-columns: 125px auto;
}
.xr-attrs dt,
.xr-attrs dd {
padding: 0;
margin: 0;
float: left;
padding-right: 10px;
width: auto;
}
.xr-attrs dt {
font-weight: normal;
grid-column: 1;
}
.xr-attrs dt:hover span {
display: inline-block;
background: var(--xr-background-color);
padding-right: 10px;
}
.xr-attrs dd {
grid-column: 2;
white-space: pre-wrap;
word-break: break-all;
}
.xr-icon-database,
.xr-icon-file-text2,
.xr-no-icon {
display: inline-block;
vertical-align: middle;
width: 1em;
height: 1.5em !important;
stroke-width: 0;
stroke: currentColor;
fill: currentColor;
}
</style>
<xarray.Dataset>
Dimensions: (x: 750, y: 340, z: 1916, timestep: 31, feature: 69)
Coordinates:
* x (x) int64 0 1 2 3 4 5 6 7 8 ... 742 743 744 745 746 747 748 749
* y (y) int64 0 1 2 3 4 5 6 7 8 ... 332 333 334 335 336 337 338 339
* z (z) int64 0 1 2 3 4 5 6 7 ... 1909 1910 1911 1912 1913 1914 1915
* timestep (timestep) int64 0 1 2 3 4 5 6 7 8 ... 22 23 24 25 26 27 28 29 30
* feature (feature) <U27 'diff_to_first_' ... 'full_temp_min_Gauss_2.0'
Data variables:
segmented (x, y, z, timestep) uint8 0 0 0 2 2 2 0 0 0 ... 0 0 0 0 0 0 0 0 0
t_utc (timestep) float64 1.667e+09 1.667e+09 ... 1.667e+09 1.667e+09
time (timestep) float64 7.036 6.218 66.42 126.6 ... 759.3 819.5 879.7
Attributes: (12/20)
name: 1
voxel size: 2.75 um
voxel: 2.75e-06
post rotation cropping coordinates [a:b,c:d,e:f]: [ 120 1062 320 972 ...
rotation angle 1: -22
rotation angle 2: -22
... ...
05_crop_githash: 761a49fa1ea416a28344b9...
git_sha_registration: 612e92b
githash_registration: 612e92b28895745f6b422c...
05_ML_cropping: [120, 870, 10, 350, 50...
pytrain_git: e5b2d83
05_coely_gitsha: 1c033cexarray.Dataset
- x: 750
- y: 340
- z: 1916
- timestep: 31
- feature: 69
- x(x)int640 1 2 3 4 5 ... 745 746 747 748 749
array([ 0, 1, 2, ..., 747, 748, 749])
- y(y)int640 1 2 3 4 5 ... 335 336 337 338 339
array([ 0, 1, 2, ..., 337, 338, 339])
- z(z)int640 1 2 3 4 ... 1912 1913 1914 1915
array([ 0, 1, 2, ..., 1913, 1914, 1915])
- timestep(timestep)int640 1 2 3 4 5 6 ... 25 26 27 28 29 30
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]) - feature(feature)<U27'diff_to_first_' ... 'full_temp_...
array(['diff_to_first_', 'diff_to_last_', 'Gaussian_4D_Blur_0.0', 'Gaussian_4D_Blur_2.0', 'Gaussian_4D_Blur_4.0', 'diff_of_gauss_4D_2.0_0.0', 'diff_of_gauss_4D_4.0_0.0', 'diff_of_gauss_4D_4.0_2.0', 'Gradient_sigma_0.0_0', 'Gradient_sigma_0.0_1', 'Gradient_sigma_0.0_2', 'Gradient_sigma_0.0_3', 'hessian_sigma_0.0_00', 'hessian_sigma_0.0_01', 'hessian_sigma_0.0_02', 'hessian_sigma_0.0_03', 'hessian_sigma_0.0_11', 'hessian_sigma_0.0_12', 'hessian_sigma_0.0_13', 'hessian_sigma_0.0_22', 'hessian_sigma_0.0_23', 'hessian_sigma_0.0_33', 'Gradient_sigma_2.0_0', 'Gradient_sigma_2.0_1', 'Gradient_sigma_2.0_2', 'Gradient_sigma_2.0_3', 'hessian_sigma_2.0_00', 'hessian_sigma_2.0_01', 'hessian_sigma_2.0_02', 'hessian_sigma_2.0_03', 'hessian_sigma_2.0_11', 'hessian_sigma_2.0_12', 'hessian_sigma_2.0_13', 'hessian_sigma_2.0_22', 'hessian_sigma_2.0_23', 'hessian_sigma_2.0_33', 'Gradient_sigma_4.0_0', 'Gradient_sigma_4.0_1', 'Gradient_sigma_4.0_2', 'Gradient_sigma_4.0_3', 'hessian_sigma_4.0_00', 'hessian_sigma_4.0_01', 'hessian_sigma_4.0_02', 'hessian_sigma_4.0_03', 'hessian_sigma_4.0_11', 'hessian_sigma_4.0_12', 'hessian_sigma_4.0_13', 'hessian_sigma_4.0_22', 'hessian_sigma_4.0_23', 'hessian_sigma_4.0_33', 'Gaussian_time_0.0', 'Gaussian_time_2.0', 'Gaussian_time_4.0', 'diff_of_gauss_time_2.0_0.0', 'diff_of_gauss_time_4.0_0.0', 'diff_of_gauss_time_4.0_2.0', 'Gaussian_space_0.0', 'Gaussian_space_2.0', 'Gaussian_space_4.0', 'diff_of_gauss_space_2.0_0.0', 'diff_of_gauss_space_4.0_0.0', 'diff_of_gauss_space_4.0_2.0', 'diff_to_min_', 'diff_temp_min_Gauss_2.0', 'first_', 'last_', 'full_temp_mean_', 'full_temp_min_', 'full_temp_min_Gauss_2.0'], dtype='<U27')
- segmented(x, y, z, timestep)uint80 0 0 2 2 2 0 0 ... 0 0 0 0 0 0 0 0
array([[[[0, 0, 0, ..., 1, 1, 1], [0, 0, 0, ..., 1, 1, 1], [0, 0, 0, ..., 1, 1, 1], ..., [2, 2, 2, ..., 1, 1, 1], [2, 2, 2, ..., 1, 1, 1], [2, 2, 0, ..., 1, 1, 1]], [[0, 0, 0, ..., 1, 1, 1], [0, 0, 0, ..., 1, 1, 1], [0, 0, 0, ..., 1, 1, 1], ..., [2, 2, 2, ..., 1, 1, 1], [2, 2, 2, ..., 1, 1, 1], [2, 2, 0, ..., 1, 1, 1]], [[2, 2, 0, ..., 1, 1, 1], [2, 2, 0, ..., 1, 1, 1], [2, 2, 0, ..., 1, 1, 1], ..., ... ..., [0, 0, 0, ..., 2, 0, 0], [0, 0, 0, ..., 2, 0, 0], [0, 0, 0, ..., 0, 0, 0]], [[0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], ..., [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0]], [[0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], ..., [0, 0, 0, ..., 0, 0, 0], [0, 2, 0, ..., 0, 0, 0], [2, 0, 0, ..., 0, 0, 0]]]], dtype=uint8) - t_utc(timestep)float641.667e+09 1.667e+09 ... 1.667e+09
array([1.66722001e+09, 1.66722040e+09, 1.66722046e+09, 1.66722052e+09, 1.66722058e+09, 1.66722064e+09, 1.66722070e+09, 1.66722076e+09, 1.66722082e+09, 1.66722088e+09, 1.66722095e+09, 1.66722101e+09, 1.66722107e+09, 1.66722113e+09, 1.66722119e+09, 1.66722125e+09, 1.66722142e+09, 1.66722148e+09, 1.66722154e+09, 1.66722160e+09, 1.66722166e+09, 1.66722172e+09, 1.66722178e+09, 1.66722184e+09, 1.66722190e+09, 1.66722196e+09, 1.66722202e+09, 1.66722209e+09, 1.66722215e+09, 1.66722221e+09, 1.66722227e+09]) - time(timestep)float647.036 6.218 66.42 ... 819.5 879.7
array([ 7.0355083, 6.2176226, 66.424704 , 126.6437905, 186.8748772, 247.0959636, 307.2980496, 367.5261356, 427.75722 , 487.9903053, 548.1683904, 608.3904758, 668.6175608, 728.8406441, 789.0717297, 849.2918141, 36.6449636, 96.8650426, 157.0971262, 217.3022106, 277.5122954, 337.7453791, 397.9744636, 458.1985478, 518.4136323, 578.6417168, 638.8698003, 699.1018843, 759.3249683, 819.5310523, 879.7401363])
- xPandasIndex
PandasIndex(Int64Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ... 740, 741, 742, 743, 744, 745, 746, 747, 748, 749], dtype='int64', name='x', length=750)) - yPandasIndex
PandasIndex(Int64Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ... 330, 331, 332, 333, 334, 335, 336, 337, 338, 339], dtype='int64', name='y', length=340)) - zPandasIndex
PandasIndex(Int64Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ... 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 1915], dtype='int64', name='z', length=1916)) - timestepPandasIndex
PandasIndex(Int64Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], dtype='int64', name='timestep')) - featurePandasIndex
PandasIndex(Index(['diff_to_first_', 'diff_to_last_', 'Gaussian_4D_Blur_0.0', 'Gaussian_4D_Blur_2.0', 'Gaussian_4D_Blur_4.0', 'diff_of_gauss_4D_2.0_0.0', 'diff_of_gauss_4D_4.0_0.0', 'diff_of_gauss_4D_4.0_2.0', 'Gradient_sigma_0.0_0', 'Gradient_sigma_0.0_1', 'Gradient_sigma_0.0_2', 'Gradient_sigma_0.0_3', 'hessian_sigma_0.0_00', 'hessian_sigma_0.0_01', 'hessian_sigma_0.0_02', 'hessian_sigma_0.0_03', 'hessian_sigma_0.0_11', 'hessian_sigma_0.0_12', 'hessian_sigma_0.0_13', 'hessian_sigma_0.0_22', 'hessian_sigma_0.0_23', 'hessian_sigma_0.0_33', 'Gradient_sigma_2.0_0', 'Gradient_sigma_2.0_1', 'Gradient_sigma_2.0_2', 'Gradient_sigma_2.0_3', 'hessian_sigma_2.0_00', 'hessian_sigma_2.0_01', 'hessian_sigma_2.0_02', 'hessian_sigma_2.0_03', 'hessian_sigma_2.0_11', 'hessian_sigma_2.0_12', 'hessian_sigma_2.0_13', 'hessian_sigma_2.0_22', 'hessian_sigma_2.0_23', 'hessian_sigma_2.0_33', 'Gradient_sigma_4.0_0', 'Gradient_sigma_4.0_1', 'Gradient_sigma_4.0_2', 'Gradient_sigma_4.0_3', 'hessian_sigma_4.0_00', 'hessian_sigma_4.0_01', 'hessian_sigma_4.0_02', 'hessian_sigma_4.0_03', 'hessian_sigma_4.0_11', 'hessian_sigma_4.0_12', 'hessian_sigma_4.0_13', 'hessian_sigma_4.0_22', 'hessian_sigma_4.0_23', 'hessian_sigma_4.0_33', 'Gaussian_time_0.0', 'Gaussian_time_2.0', 'Gaussian_time_4.0', 'diff_of_gauss_time_2.0_0.0', 'diff_of_gauss_time_4.0_0.0', 'diff_of_gauss_time_4.0_2.0', 'Gaussian_space_0.0', 'Gaussian_space_2.0', 'Gaussian_space_4.0', 'diff_of_gauss_space_2.0_0.0', 'diff_of_gauss_space_4.0_0.0', 'diff_of_gauss_space_4.0_2.0', 'diff_to_min_', 'diff_temp_min_Gauss_2.0', 'first_', 'last_', 'full_temp_mean_', 'full_temp_min_', 'full_temp_min_Gauss_2.0'], dtype='object', name='feature'))
- name :
- 1
- voxel size :
- 2.75 um
- voxel :
- 2.75e-06
- post rotation cropping coordinates [a:b,c:d,e:f] :
- [ 120 1062 320 972 0 2016]
- rotation angle 1 :
- -22
- rotation angle 2 :
- -22
- git_sha_rotation :
- 2e4dec6
- githash_rotation :
- 2e4dec6a6358ec0b6c95cac935d9648b716189a9
- image_data_names :
- <scan>_iamge_data_<time_step>, e.g. 02_image_data_00 is the first time step of scan 02
- 03_crop_git_sha :
- 761a49f
- 03_crop_githash :
- 761a49fa1ea416a28344b9f2e885a2e83f94996c
- 04_crop_git_sha :
- 761a49f
- 04_crop_githash :
- 761a49fa1ea416a28344b9f2e885a2e83f94996c
- 05_crop_git_sha :
- 761a49f
- 05_crop_githash :
- 761a49fa1ea416a28344b9f2e885a2e83f94996c
- git_sha_registration :
- 612e92b
- githash_registration :
- 612e92b28895745f6b422c556fb8aa7ad376baeb
- 05_ML_cropping :
- [120, 870, 10, 350, 50, -50]
- pytrain_git :
e5b2d83- 05_coely_gitsha :
- 1c033ce
In [ ]: