Files
pyTrainSeg/example_notebook.ipynb
T
2023-09-21 14:05:03 +02:00

1.2 MiB

Segment water in GDL to quantify influence of liquid water on electrochemistry

This is an example notebook how I segmented the water within the cathode GDL. Segmentation of the membrane cavities works analogous with different cropping and label sets. There was an update of dask that broke the code. The feature stack setup and training functionalities have been fixed, but the full volume segmentation has yet to be reviewed. However, there is no reason why this should not work after some adjustments, probably even much better than before.

In [1]:
# modules
import os
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import dask
import dask.array
from scipy import ndimage
from skimage import filters, feature, io
from skimage.morphology import disk,ball
import sys
from itertools import combinations_with_replacement
import pickle
import imageio
import json
from dask.distributed import Client, LocalCluster
import socket
import subprocess
import gc
import h5py

# get the ML functions, TODO: make a library once it works/is in a stable state
pytrainpath = '/mpc/homes/fische_r/lib/pytrainseg' #path of git repo
cwd = os.getcwd()
os.chdir(pytrainpath)
from filter_functions import image_filter
import training_functions as tfs
from training_functions import train_segmentation
from segmentation import segmentation
pytrain_git_sha = subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD']).decode().strip()
os.chdir(cwd)

#paths
host = socket.gethostname()
if host == 'mpc2959.psi.ch':
    gitpath = '/mpc/homes/fische_r/lib/co2ely-tomcat'
    toppath = '/mpc/homes/fische_r/NAS/DASCOELY'
    toppathSSD = '/mnt/SSD/fische_r/COELY'
    temppath = '/mnt/SSD/fische_r/tmp'
    temppath_2 = '/mpc/homes/fische_r/NAS/tmp'
    training_path = '/mpc/homes/fische_r/NAS/DASCOELY/processing/05_water_GDL_ML/'
    memlim = '420GB' #1/2 of available RAM for 2 workers
else:
    print('host '+host+' currently not supported')

# fetch githash
cwd = os.getcwd()
os.chdir(gitpath)
git_sha = subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD']).decode().strip()
githash = subprocess.check_output(['git', 'rev-parse', 'HEAD']).decode().strip()
os.chdir(cwd)

functionalities for interactive training

In [2]:
from ipywidgets import Image
from ipywidgets import ColorPicker, IntSlider, link, AppLayout, HBox
from ipycanvas import  hold_canvas,  MultiCanvas #RoughCanvas,Canvas,

def on_mouse_down(x, y):
    global drawing
    global position
    global shape
    drawing = True
    position = (x, y)
    shape = [position]

def on_mouse_move(x, y):
    global drawing
    global position
    global shape
    if not drawing:
        return
    with hold_canvas():
        canvas.stroke_line(position[0], position[1], x, y)
        position = (x, y)
    shape.append(position)

def on_mouse_up(x, y):
    global drawing
    global positiondu
    global shape
    drawing = False
    with hold_canvas():
        canvas.stroke_line(position[0], position[1], x, y)
        canvas.fill_polygon(shape)
    shape = []
    
def display_feature(i, TS):
    print('selected '+TS.feature_names[i])
    im = TS.current_feat_stack[:,:,i]
    im8 = im-im.min()
    im8 = im8/im8.max()*255
    return im8

fire up dask

In [3]:
dask.config.config['temporary-directory'] = temppath
def boot_client():
    tempfolder = temppath  #a big SSD is a major adavantage to allow spill to disk and still be efficient. large dataset might crash with too small SSD or be slow with normal HDD
# tempfolder = temppath_2
# dask.config.config['distributed']['worker']['memory']['recent-to-old-time'] = '200000s'

# here you have the option to use a virtual cluster or even slurm on ra (not attempted yet)
    cluster = LocalCluster(dashboard_address=':35000', memory_limit = memlim, n_workers=2) #settings optimised for mpc2959, play around if needed, if you know nothing else is using RAM then you can almost go to the limit
# # maybe less workers with more threads makes better use of shared memory 

# # scheduler_port = 'tcp://129.129.188.222:8786' #<-- if scheduler on mpc2959; scheduler on mpc2053 -> 'tcp://129.129.188.248:8786'
# # cluster = scheduler_port

    client = Client(cluster)
# # client.amm.start()
    print('Dashboard at '+client.dashboard_link)
    return client, cluster
2023-05-16 09:28:37,202 - distributed.diskutils - INFO - Found stale lock file and directory '/mnt/SSD/fische_r/tmp/dask-worker-space/worker-aqx4i1f3', purging
/mpc/homes/fische_r/miniconda3/lib/python3.10/contextlib.py:142: UserWarning: Creating scratch directories is taking a surprisingly long time. (3.69s) This is often due to running workers on a network file system. Consider specifying a local-directory to point workers to write scratch data to a local disk.
  next(self.gen)
Dashboard at http://127.0.0.1:35000/status
In [ ]:
def reboot_client(client, cluster):
    client.shutdown()
    client = Client(cluster)
    return client
In [ ]:
client, cluster = boot_client()

Test ROI selection

Data preparation

create dask array

In [12]:
sample = '4'
file = '02_'+sample+'_registered_3p1D.nc'
imagepath = os.path.join(path_02_4D, file)
data = xr.open_dataset(imagepath)
images = [im for im in data.keys() if im[3:7] == 'imag']
images.sort()
In [13]:
t_utc = data['t_utc'].data
time = data['time'].data
In [14]:
# define some cropping dimensions


# here with obtained cropping data from file
jsonpath = os.path.join(training_path, 'cathode_cropping_and_aligning.json')
crop_dict = json.load(open(jsonpath, 'r'))

(a,b,c,d) = crop_dict[sample]
(e,f) = (50,-50)

#corrections to crop coordinates
# f = e+1750
# e = e+100
define border to crop to GDL
In [16]:
GDL_crop = 0
plt.figure(figsize=(16,9))
plt.imshow(test_im1[600,GDL_crop:, :], vmin =10000, vmax=15000, cmap='gray')
Out[16]:
<matplotlib.image.AxesImage at 0x7f5fa9009b70>
No description has been provided for this image
In [17]:
shp = data[images[4]][a:b, c:d, e:f].shape
# shp = data[images[4]][a:b, c+GDL_crop:d, e:f].shape
shp = shp + (len(images),)
print(shp, test_im1[:,GDL_crop:, :].shape)
(750, 340, 1916, 71) (750, 340, 1916)
In [23]:
data.close()

let dask load the data

needs to be a 4D-hdf5, for example created with xarray as .nc

In [ ]:
filename = '02_'+sample+'_registered_4D.nc'
imagepath = os.path.join(path_02_4D, filename)
In [ ]:
file = h5py.File(imagepath)
In [ ]:
da = dask.array.from_array(file['image_data'][a:b, c+GDL_crop:d, e:f], chunks= chunks) #do the cropping within the dask array creation, otherwise dask only adds a graph layer and might crash ?!

get data into image filter class

In [24]:
# TODO: include this routine into pytrainseg

IF = image_filter(sigmas = [2,4,6])# sigmas define the kernels to be used for the Gaussian Blurs
IF.data = da
shp = da.shape
coords = {'x': np.arange(shp[0]), 'y': np.arange(shp[1]), 'z': np.arange(shp[2]), 'time': np.arange(shp[3])}
IF.original_dataset = xr.Dataset({'tomo': (['x','y','z','time'], da)},
                                 coords = coords
                                )
# IF.data = IF.data.rechunk('auto')

prepare features

In [25]:
IF.prepare()
In [26]:
IF.stack_features()
In [28]:
IF.feature_stack #shows the feature stack
Out[28]:
Array Chunk
Bytes 16.15 TiB 25.27 MiB
Shape (750, 340, 1916, 71, 64) (36, 36, 36, 71, 1)
Dask graph 903168 chunks in 465 graph layers
Data type float64 numpy.ndarray
340 750 64 71 1916
In [29]:
IF.make_xarray_nc()

Training

set up objects

In [31]:
training_path_sample = os.path.join(training_path, sample)
if not os.path.exists(training_path_sample):
    os.mkdir(training_path_sample)
In [33]:
TS = train_segmentation(training_path=training_path_sample)
TS.client = client
IF.client = client
TS.cluster = cluster
IF.cluster = cluster
In [34]:
TS.import_lazy_feature_data(IF.result, IF.original_dataset)
In [35]:
IF.combined_feature_names = list(IF.feature_names) + list(IF.feature_names_time_independent)
In [36]:
TS.combined_feature_names = IF.combined_feature_names

interactive training

check for existing training sets

these training sets only work if the cropping has not changed

In [38]:
existing_sets = os.listdir(os.path.join(training_path_sample, 'label_images'))
existing_sets.sort()
existing_sets
Out[38]:
['label_image_x_157_time_5_.tif',
 'label_image_x_172_time_17_.tif',
 'label_image_x_172_time_2_.tif',
 'label_image_x_252_time_0_.tif',
 'label_image_x_264_time_29_.tif',
 'label_image_x_270_time_50_.tif',
 'label_image_x_307_time_25_.tif',
 'label_image_x_368_time_45_.tif',
 'label_image_x_456_time_0_.tif',
 'label_image_x_503_time_15_.tif',
 'label_image_x_531_time_50_.tif',
 'label_image_y_241_time_0_.tif',
 'label_image_y_245_time_8_.tif',
 'label_image_y_250_time_23_.tif',
 'label_image_y_250_time_33_.tif',
 'label_image_y_255_time_20_.tif',
 'label_image_y_255_time_40_.tif',
 'label_image_y_255_time_64_.tif',
 'label_image_y_270_time_47_.tif',
 'label_image_y_280_time_57_.tif',
 'label_image_y_71_time_27_.tif',
 'label_image_z_307_time_0_.tif',
 'label_image_z_307_time_10_.tif',
 'label_image_z_307_time_25_.tif',
 'label_image_z_320_time_10_.tif',
 'label_image_z_320_time_30_.tif',
 'label_image_z_320_time_45_.tif',
 'label_image_z_468_time_11_.tif',
 'label_image_z_546_time_0_.tif',
 'label_image_z_564_time_0_.tif']
In [39]:
training_path
Out[39]:
'/mpc/homes/fische_r/NAS/DASCOELY/processing/05_water_GDL_ML/'
In [40]:
# you can load a compatible pickled training dict, check feature names
TS.training_dict = pickle.load(open(os.path.join(TS.training_path, pytrain_git_sha+'_training_dict.p'),'rb'))
In [ ]:
TS.training_dict

re-train with existing label sets. clear the training dictionary if necessary (training_dict)

In [41]:
TS.train()
training with existing label images
label_image_z_546_time_0_.tif
label_image_y_255_time_40_.tif already done
label_image_z_307_time_25_.tif already done
label_image_x_157_time_5_.tif already done
label_image_y_71_time_27_.tif already done
label_image_x_270_time_50_.tif already done
label_image_x_456_time_0_.tif already done
label_image_z_320_time_10_.tif already done
label_image_x_307_time_25_.tif
label_image_z_564_time_0_.tif already done
label_image_z_320_time_30_.tif already done
label_image_x_264_time_29_.tif already done
label_image_y_280_time_57_.tif already done
label_image_y_250_time_23_.tif already done
label_image_x_172_time_17_.tif
2023-05-16 09:45:00,371 - distributed.utils_perf - WARNING - full garbage collections took 11% CPU time recently (threshold: 10%)
2023-05-16 09:45:14,621 - distributed.utils_perf - WARNING - full garbage collections took 13% CPU time recently (threshold: 10%)
2023-05-16 09:45:35,188 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
2023-05-16 09:46:04,314 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-05-16 09:49:22,459 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
2023-05-16 09:50:54,694 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
2023-05-16 09:52:31,246 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
2023-05-16 09:53:45,077 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
2023-05-16 09:54:17,198 - distributed.worker - ERROR - Timed out during handshake while connecting to tcp://127.0.0.1:40027 after 30 s
Traceback (most recent call last):
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/distributed/comm/tcp.py", line 225, in read
    frames_nbytes = await stream.read_bytes(fmt_size)
asyncio.exceptions.CancelledError

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/asyncio/tasks.py", line 456, in wait_for
    return fut.result()
asyncio.exceptions.CancelledError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/distributed/comm/core.py", line 328, in connect
    handshake = await asyncio.wait_for(comm.read(), time_left())
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/asyncio/tasks.py", line 458, in wait_for
    raise exceptions.TimeoutError() from exc
asyncio.exceptions.TimeoutError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/distributed/utils.py", line 741, in wrapper
    return await func(*args, **kwargs)
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/distributed/worker.py", line 1566, in close
    await r.close_gracefully(reason=reason)
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/distributed/core.py", line 1224, in send_recv_from_rpc
    comm = await self.pool.connect(self.addr)
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/distributed/core.py", line 1468, in connect
    return await connect_attempt
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/distributed/core.py", line 1389, in _connect
    comm = await connect(
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/distributed/comm/core.py", line 333, in connect
    raise OSError(
OSError: Timed out during handshake while connecting to tcp://127.0.0.1:40027 after 30 s

Traceback (most recent call last):
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/distributed/utils.py", line 741, in wrapper
    return await func(*args, **kwargs)
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/distributed/worker.py", line 1518, in close
    await self.finished()
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/distributed/core.py", line 494, in finished
    await self._event_finished.wait()
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/asyncio/locks.py", line 214, in wait
    await fut
asyncio.exceptions.CancelledError
2023-05-16 09:54:47,223 - distributed.worker - CRITICAL - Error trying close worker in response to broken internal state. Forcibly exiting worker NOW
Traceback (most recent call last):
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/distributed/utils.py", line 741, in wrapper
    return await func(*args, **kwargs)
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/distributed/worker.py", line 1518, in close
    await self.finished()
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/distributed/core.py", line 494, in finished
    await self._event_finished.wait()
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/asyncio/locks.py", line 214, in wait
    await fut
asyncio.exceptions.CancelledError

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/asyncio/tasks.py", line 456, in wait_for
    return fut.result()
asyncio.exceptions.CancelledError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/distributed/worker.py", line 230, in _force_close
    await asyncio.wait_for(
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/asyncio/tasks.py", line 458, in wait_for
    raise exceptions.TimeoutError() from exc
asyncio.exceptions.TimeoutError
2023-05-16 09:54:47,590 - distributed.nanny - WARNING - Restarting worker
2023-05-16 09:55:24,784 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-05-16 09:55:27,257 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-05-16 09:55:30,068 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-05-16 09:55:33,322 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-05-16 09:55:36,585 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-05-16 09:55:39,938 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-05-16 09:55:43,304 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-05-16 09:55:46,738 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-05-16 09:55:51,772 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-05-16 09:55:56,134 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-05-16 09:56:00,644 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-05-16 09:56:06,342 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-05-16 09:56:11,303 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-05-16 09:56:17,016 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-05-16 09:56:22,878 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-05-16 09:56:29,303 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-05-16 09:56:36,880 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-05-16 09:56:43,433 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-05-16 09:56:50,846 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-05-16 09:56:59,606 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-05-16 09:57:07,562 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-05-16 09:57:16,888 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-05-16 09:57:26,843 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-05-16 09:57:37,066 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-05-16 09:57:47,981 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-05-16 09:57:59,347 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-05-16 09:58:11,759 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-05-16 09:58:25,270 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-05-16 09:58:40,363 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-05-16 09:58:54,131 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-05-16 09:59:11,643 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-05-16 09:59:30,384 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-05-16 09:59:49,345 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-05-16 10:00:11,604 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-05-16 10:00:50,460 - distributed.utils_perf - WARNING - full garbage collections took 15% CPU time recently (threshold: 10%)
2023-05-16 10:01:38,505 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
2023-05-16 10:02:19,995 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
2023-05-16 10:03:03,981 - distributed.utils_perf - WARNING - full garbage collections took 12% CPU time recently (threshold: 10%)
2023-05-16 10:03:44,879 - distributed.utils_perf - WARNING - full garbage collections took 12% CPU time recently (threshold: 10%)
2023-05-16 10:04:16,568 - distributed.utils_perf - WARNING - full garbage collections took 12% CPU time recently (threshold: 10%)
2023-05-16 10:05:04,492 - distributed.utils_perf - WARNING - full garbage collections took 12% CPU time recently (threshold: 10%)
2023-05-16 10:06:05,845 - distributed.utils_perf - WARNING - full garbage collections took 11% CPU time recently (threshold: 10%)
2023-05-16 10:06:44,248 - distributed.utils_perf - WARNING - full garbage collections took 11% CPU time recently (threshold: 10%)
2023-05-16 10:07:48,311 - distributed.utils_perf - WARNING - full garbage collections took 11% CPU time recently (threshold: 10%)
2023-05-16 10:08:33,565 - distributed.utils_perf - WARNING - full garbage collections took 11% CPU time recently (threshold: 10%)
2023-05-16 10:09:17,781 - distributed.utils_perf - WARNING - full garbage collections took 11% CPU time recently (threshold: 10%)
2023-05-16 10:10:02,956 - distributed.utils_perf - WARNING - full garbage collections took 11% CPU time recently (threshold: 10%)
2023-05-16 10:10:56,186 - distributed.utils_perf - WARNING - full garbage collections took 11% CPU time recently (threshold: 10%)
2023-05-16 10:11:48,945 - distributed.utils_perf - WARNING - full garbage collections took 11% CPU time recently (threshold: 10%)
2023-05-16 10:13:03,665 - distributed.utils_perf - WARNING - full garbage collections took 11% CPU time recently (threshold: 10%)
2023-05-16 10:14:37,086 - distributed.utils_perf - WARNING - full garbage collections took 11% CPU time recently (threshold: 10%)
2023-05-16 10:16:28,883 - distributed.utils_perf - WARNING - full garbage collections took 10% CPU time recently (threshold: 10%)
2023-05-16 10:17:57,100 - distributed.utils_perf - WARNING - full garbage collections took 10% CPU time recently (threshold: 10%)
2023-05-16 10:20:21,831 - distributed.utils_perf - WARNING - full garbage collections took 10% CPU time recently (threshold: 10%)
2023-05-16 10:40:24,306 - distributed.worker.memory - WARNING - gc.collect() took 11.618s. This is usually a sign that some tasks handle too many Python objects at the same time. Rechunking the work into smaller tasks might help.
/mpc/homes/fische_r/lib/pytrainseg/training_functions.py:375: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if not X == 'no labels':
label_image_x_503_time_15_.tif
2023-05-16 11:00:29,991 - distributed.utils_perf - WARNING - full garbage collections took 13% CPU time recently (threshold: 10%)
2023-05-16 11:02:55,018 - distributed.utils_perf - WARNING - full garbage collections took 13% CPU time recently (threshold: 10%)
2023-05-16 11:04:08,520 - distributed.utils_perf - WARNING - full garbage collections took 13% CPU time recently (threshold: 10%)
2023-05-16 11:21:07,596 - distributed.worker.memory - WARNING - gc.collect() took 11.310s. This is usually a sign that some tasks handle too many Python objects at the same time. Rechunking the work into smaller tasks might help.
2023-05-16 11:34:20,746 - distributed.worker.memory - WARNING - gc.collect() took 12.740s. This is usually a sign that some tasks handle too many Python objects at the same time. Rechunking the work into smaller tasks might help.
label_image_y_255_time_20_.tif
2023-05-16 11:58:42,997 - distributed.utils_perf - WARNING - full garbage collections took 10% CPU time recently (threshold: 10%)
2023-05-16 12:00:56,045 - distributed.utils_perf - WARNING - full garbage collections took 10% CPU time recently (threshold: 10%)
2023-05-16 12:03:19,904 - distributed.utils_perf - WARNING - full garbage collections took 10% CPU time recently (threshold: 10%)
2023-05-16 12:05:08,338 - distributed.utils_perf - WARNING - full garbage collections took 10% CPU time recently (threshold: 10%)
2023-05-16 12:40:23,700 - distributed.worker.memory - WARNING - gc.collect() took 15.942s. This is usually a sign that some tasks handle too many Python objects at the same time. Rechunking the work into smaller tasks might help.
2023-05-16 12:54:44,026 - distributed.worker.memory - WARNING - gc.collect() took 16.627s. This is usually a sign that some tasks handle too many Python objects at the same time. Rechunking the work into smaller tasks might help.
2023-05-16 13:06:40,104 - distributed.worker.memory - WARNING - gc.collect() took 18.170s. This is usually a sign that some tasks handle too many Python objects at the same time. Rechunking the work into smaller tasks might help.
2023-05-16 13:23:04,136 - distributed.worker.memory - WARNING - gc.collect() took 19.639s. This is usually a sign that some tasks handle too many Python objects at the same time. Rechunking the work into smaller tasks might help.
2023-05-16 13:39:07,655 - distributed.worker.memory - WARNING - gc.collect() took 25.502s. This is usually a sign that some tasks handle too many Python objects at the same time. Rechunking the work into smaller tasks might help.
label_image_y_250_time_33_.tif
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In[41], line 1
----> 1 TS.train()

File ~/lib/pytrainseg/training_functions.py:374, in train_segmentation.train(self, clear_dict, redo)
    372     continue
    373 print(label_name)
--> 374 X, y = training_set_per_image(label_name, path, feat_data, self.lazy)
    375 if not X == 'no labels':
    376     self.training_dict[label_name] = X,y

File ~/lib/pytrainseg/training_functions.py:138, in training_set_per_image(label_name, trainingpath, feat_data, lazy)
    130 # if lazy:
    131 #     print('Need to actually calculate the features for each slice, seems inefficient')
    132 # #   not sure how efficient this is
   (...)
    135 #     feat_stack = feat_stack.compute()
    136 # else:
    137 if type(feat_stack) is not np.ndarray:
--> 138         feat_stack = feat_stack.compute()
    139 if type(feat_stack_t_idp) is not np.ndarray:
    140         feat_stack_t_idp = feat_stack_t_idp.compute()

File ~/miniconda3/lib/python3.10/site-packages/dask/base.py:314, in DaskMethodsMixin.compute(self, **kwargs)
    290 def compute(self, **kwargs):
    291     """Compute this dask collection
    292 
    293     This turns a lazy Dask collection into its in-memory equivalent.
   (...)
    312     dask.base.compute
    313     """
--> 314     (result,) = compute(self, traverse=False, **kwargs)
    315     return result

File ~/miniconda3/lib/python3.10/site-packages/dask/base.py:593, in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
    585     return args
    587 schedule = get_scheduler(
    588     scheduler=scheduler,
    589     collections=collections,
    590     get=get,
    591 )
--> 593 dsk = collections_to_dsk(collections, optimize_graph, **kwargs)
    594 keys, postcomputes = [], []
    595 for x in collections:

File ~/miniconda3/lib/python3.10/site-packages/dask/base.py:366, in collections_to_dsk(collections, optimize_graph, optimizations, **kwargs)
    364 for opt, val in groups.items():
    365     dsk, keys = _extract_graph_and_keys(val)
--> 366     dsk = opt(dsk, keys, **kwargs)
    368     for opt_inner in optimizations:
    369         dsk = opt_inner(dsk, keys, **kwargs)

File ~/miniconda3/lib/python3.10/site-packages/dask/array/optimization.py:57, in optimize(dsk, keys, fuse_keys, fast_functions, inline_functions_fast_functions, rename_fused_keys, **kwargs)
     54 if config.get("optimization.fuse.active") is False:
     55     return dsk
---> 57 dependencies = dsk.get_all_dependencies()
     58 dsk = ensure_dict(dsk)
     60 # Low level task optimizations

File ~/miniconda3/lib/python3.10/site-packages/dask/highlevelgraph.py:813, in HighLevelGraph.get_all_dependencies(self)
    811 if missing_keys:
    812     for layer in self.layers.values():
--> 813         for k in missing_keys & layer.keys():
    814             self.key_dependencies[k] = layer.get_dependencies(k, all_keys)
    815 return self.key_dependencies

File ~/miniconda3/lib/python3.10/_collections_abc.py:638, in Set.__and__(self, other)
    636 if not isinstance(other, Iterable):
    637     return NotImplemented
--> 638 return self._from_iterable(value for value in other if value in self)

File ~/miniconda3/lib/python3.10/_collections_abc.py:880, in KeysView._from_iterable(cls, it)
    878 @classmethod
    879 def _from_iterable(cls, it):
--> 880     return set(it)

File ~/miniconda3/lib/python3.10/_collections_abc.py:638, in <genexpr>(.0)
    636 if not isinstance(other, Iterable):
    637     return NotImplemented
--> 638 return self._from_iterable(value for value in other if value in self)

File ~/miniconda3/lib/python3.10/_collections_abc.py:883, in KeysView.__contains__(self, key)
    882 def __contains__(self, key):
--> 883     return key in self._mapping

File ~/miniconda3/lib/python3.10/site-packages/dask/highlevelgraph.py:540, in MaterializedLayer.__contains__(self, k)
    537     super().__init__(annotations=annotations)
    538     self.mapping = mapping
--> 540 def __contains__(self, k):
    541     return k in self.mapping
    543 def __getitem__(self, k):

KeyboardInterrupt: 

import training dict of other samples

(replace sample name and repeat for multiple samples), if necessary check features for overlap

In [39]:
oldsample = '4'
oldgitsha = 'ec4415d'
if oldsample == '4':
    training_dict_old = pickle.load(open(os.path.join(toppathSSD, '05_water_GDL_ML', '4', 'ec4415d_training_dict_without_loc_feat.p'), 'rb'))
else:
    training_dict_old = pickle.load(open(os.path.join(training_path, oldsample,  oldgitsha+'_training_dict.p'),'rb'))
oldfeatures = pickle.load(open(os.path.join(training_path, oldsample,  oldgitsha+'_feature_names.p'),'rb'))
    
    # pickle.dump(TS.training_dict, open(os.path.join(TS.training_path, pytrain_git_sha+'_training_dict.p'),'wb'))
# pickle.dump(TS.feature_names, open(os.path.join(TS.training_path, pytrain_git_sha+'_feature_names.p'),'wb'))

for key in training_dict_old.keys():
    TS.training_dict[oldsample+key] = training_dict_old[key]

suggest a new training coordinate

currently retraining with new feature stack not properly implemented. Workaround: choose from the exiting training sets and train with them (additional labeling optional)

In [42]:
TS.suggest_training_set()
You could try  x = 55  and  z = 1058
However, please sort it like the original xyztimetime_0feature
In [70]:
c1 = 'y'
p1 = 240
c2 = 'time'
p2 = 14
In [ ]:
TS.load_training_set(c1, p1, c2, p2)
In [ ]:
im8 = TS.current_im8 #for canvas
In [73]:
feat_data = TS.feat_data
[c1,p1,c2,p2] = TS.current_coordinates
newslice = True

if c1 == 'x' and c2 == 'time':
    feat_stack = feat_data['feature_stack'].sel(x = p1, time = p2)
    feat_stack_t_idp = feat_data['feature_stack_time_independent'].sel(x = p1, time_0 = 0)
elif c1 == 'x' and c2 == 'y':
    feat_stack = feat_data['feature_stack'].sel(x = p1, y = p2).data
    feat_stack_t_idp = feat_data['feature_stack_time_independent'].sel(x = p1, y = p2)
elif c1 == 'x' and c2 == 'z':
    feat_stack = feat_data['feature_stack'].sel(x = p1, z = p2).data
    feat_stack_t_idp = feat_data['feature_stack_time_independent'].sel(x = p1, z = p2)
elif c1 == 'y' and c2 == 'z':
    feat_stack = feat_data['feature_stack'].sel(y = p1, z = p2).data
    feat_stack_t_idp = feat_data['feature_stack_time_independent'].sel(y = p1, z = p2)
elif c1 == 'y' and c2 == 'time':
    feat_stack = feat_data['feature_stack'].sel(y = p1, time = p2).data
    feat_stack_t_idp = feat_data['feature_stack_time_independent'].sel(y = p1, time_0 = 0)
elif c1 == 'z' and c2 == 'time':
    feat_stack = feat_data['feature_stack'].sel(z = p1, time = p2).data
    feat_stack_t_idp = feat_data['feature_stack_time_independent'].sel(z = p1, time_0 = 0)

calculate the feature stack of the current slice for training

In [ ]:
# time dependent features
if type(feat_stack) is not np.ndarray:
        fut = client.scatter(feat_stack)
        fut = fut.result()
        fut = fut.compute()
        feat_stack = fut
        client.restart()

reboot cluster if workers do not return

In [ ]:
len(client.cluster.workers)

if not len(client.cluster.workers)>0:   
    client = reboot_client(client, cluster)
    TS.client = client
    IF.client = client
In [ ]:
# time independent features
if type(feat_stack_t_idp) is not np.ndarray:
        fut = client.scatter(feat_stack_t_idp)
        fut = fut.result()
        fut = fut.compute()
        feat_stack_t_idp = fut
        client.restart()
    
In [ ]:
if not len(client.cluster.workers)>0:
    client = reboot_client(client, cluster)
    TS.client = client
    IF.client = client
In [ ]:
# put features together
feat_stack = np.concatenate([feat_stack, feat_stack_t_idp], axis = 2)
In [ ]:
TS.current_feat_stack = feat_stack
if type(TS.current_feat_stack) is not np.ndarray:
    TS.current_computed = False
else:
    TS.current_computed = True

canvas for labeling

In [92]:
alpha = 0.15
# zoom1 = (-500,-1)
# zoom2 = (600,1400)

# zoom1 = (0, -1)
# zoom2 = (0, -1)

# im8 = TS.current_im8
#trick: use gaussian_time_4_0 to label static phases ()
# im8 = display_feature(-2, TS)
# im8 = display_feature(-20, TS)
# print(IF.combined_feature_names[-20])
print('original shape: ',im8.shape)
im8_display = im8.copy() #[zoom1[0]:zoom1[1], zoom2[0]:zoom2[1]]
# print('diyplay shape : ',im8_display.shape,' at: ', (zoom1[0], zoom2[0]))

resultim = TS.current_result.copy()

resultim_display = resultim #[zoom1[0]:zoom1[1], zoom2[0]:zoom2[1]]


width = im8_display.shape[1]
height = im8_display.shape[0]
Mcanvas = MultiCanvas(4, width=width, height=height)
background = Mcanvas[0]
resultdisplay = Mcanvas[2]
truthdisplay = Mcanvas[1]
canvas = Mcanvas[3]
canvas.sync_image_data = True
drawing = False
position = None
shape = []
image_data = np.stack((im8_display, im8_display, im8_display), axis=2)
background.put_image_data(image_data, 0, 0)
slidealpha = IntSlider(description="Result overlay", value=0.15)
resultdisplay.global_alpha = alpha #slidealpha.value
if np.any(resultim>0):
    result_data = np.stack(((resultim_display==0), (resultim_display==1),(resultim_display==2)), axis=2)*255
    mask3 = resultim_display==3
    result_data[mask3,0] = 255
    result_data[mask3,1] = 255
else:
    result_data = np.stack((0*resultim, 0*resultim, 0*resultim), axis=2)
resultdisplay.put_image_data(result_data, 0, 0)
canvas.on_mouse_down(on_mouse_down)
canvas.on_mouse_move(on_mouse_move)
canvas.on_mouse_up(on_mouse_up)
picker = ColorPicker(description="Color:", value="#ff0000") #red
# picker = ColorPicker(description="Color:", value="#0000ff") #blue
# picker = ColorPicker(description="Color:", value="#00ff00") #green

link((picker, "value"), (canvas, "stroke_style"))
link((picker, "value"), (canvas, "fill_style"))
link((slidealpha, "value"), (resultdisplay, "global_alpha"))

HBox((Mcanvas,picker))
# HBox((Mcanvas,)) #picker 
original shape:  (750, 1916)
diyplay shape :  (749, 1915)  at:  (0, 0)
Out[92]:
HBox(children=(MultiCanvas(height=749, width=1915), ColorPicker(value='#ff0000', description='Color:')))
In [81]:
# crude method to adjust brightness and contrast
tfs.plot_im_histogram(im8)
# im8 = TS.current_im8
# im8 = tfs.adjust_image_contrast(im8,20,200)
No description has been provided for this image

inspect labels and training progress

In [93]:
fig, axes = plt.subplots(1,6, figsize=(20,10))
axes[0].imshow(TS.current_result, 'gray')
axes[1].imshow(TS.current_im8, 'gray')

# TS.current_diff_im = TS.current_im-TS.current_first_im
# TS.current_diff_im = TS.current_diff_im/TS.current_diff_im.max()*255
axes[2].imshow(-TS.current_diff_im)#,vmin=6e4)
# axes[3].imshow(im8old, 'gray')
axes[3].imshow(TS.current_first_im, 'gray')
axes[4].imshow(TS.current_truth)
if TS.current_computed:
    axes[5].imshow(TS.current_feat_stack[:,:,-10])
else:
    axes[5].imshow(TS.current_result, 'gray')

for ax in axes:
    ax.set_xticks([])
    ax.set_yticks([])
No description has been provided for this image

update training set if labels are ok

In [84]:
label_set = canvas.get_image_data()

test = TS.current_truth.copy()

test[np.bitwise_and(label_set[:,:,0]>0,np.bitwise_xor(label_set[:,:,0]>0,label_set[:,:,1]>0))] = 1
test[label_set[:,:,1]>0] = 2
test[label_set[:,:,2]>0] = 4 #order of 4&3 flipped for legacy reasons (existing training labels)
test[np.bitwise_and(label_set[:,:,0]>0,label_set[:,:,1]>0)] = 3

TS.current_truth = test.copy()
imageio.imsave(TS.current_truthpath, TS.current_truth)

train!

In [87]:
# TODO: see, if training gets slow for many label sets, currently stored in training_dict and read as loop. or if it is just the larger amount of data
TS.train_slice()
now actually calculating the features
2023-03-29 10:43:42,902 - distributed.utils_perf - WARNING - full garbage collections took 12% CPU time recently (threshold: 10%)
2023-03-29 10:46:50,886 - distributed.utils_perf - WARNING - full garbage collections took 11% CPU time recently (threshold: 10%)
2023-03-29 10:47:32,421 - distributed.worker.memory - WARNING - gc.collect() took 1.027s. This is usually a sign that some tasks handle too many Python objects at the same time. Rechunking the work into smaller tasks might help.
2023-03-29 10:49:51,644 - distributed.worker.memory - WARNING - gc.collect() took 1.240s. This is usually a sign that some tasks handle too many Python objects at the same time. Rechunking the work into smaller tasks might help.
2023-03-29 10:49:52,590 - distributed.worker.memory - WARNING - gc.collect() took 1.523s. This is usually a sign that some tasks handle too many Python objects at the same time. Rechunking the work into smaller tasks might help.
2023-03-29 10:50:09,993 - distributed.worker.memory - WARNING - gc.collect() took 1.379s. This is usually a sign that some tasks handle too many Python objects at the same time. Rechunking the work into smaller tasks might help.
2023-03-29 10:52:55,347 - tornado.application - ERROR - Uncaught exception GET /status/ws (::1)
HTTPServerRequest(protocol='http', host='127.0.0.1:35000', method='GET', uri='/status/ws', version='HTTP/1.1', remote_ip='::1')
Traceback (most recent call last):
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/tornado/websocket.py", line 942, in _accept_connection
    open_result = handler.open(*handler.open_args, **handler.open_kwargs)
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/tornado/web.py", line 3208, in wrapper
    return method(self, *args, **kwargs)
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/bokeh/server/views/ws.py", line 149, in open
    raise ProtocolError("Token is expired.")
bokeh.protocol.exceptions.ProtocolError: Token is expired.
ERROR:tornado.application:Uncaught exception GET /status/ws (::1)
HTTPServerRequest(protocol='http', host='127.0.0.1:35000', method='GET', uri='/status/ws', version='HTTP/1.1', remote_ip='::1')
Traceback (most recent call last):
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/tornado/websocket.py", line 942, in _accept_connection
    open_result = handler.open(*handler.open_args, **handler.open_kwargs)
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/tornado/web.py", line 3208, in wrapper
    return method(self, *args, **kwargs)
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/bokeh/server/views/ws.py", line 149, in open
    raise ProtocolError("Token is expired.")
bokeh.protocol.exceptions.ProtocolError: Token is expired.
feat_stack is not a numpy array! check why
training and classifying
In [421]:
# # load exisiting classifier and segment one slice
# clf = pickle.load(open(os.path.join(training_path, 'classifier.p'), 'rb'))
# feat = TS.current_feat_stack.compute()
# shp = feat[...,0].shape
# num_feat = feat.shape[-1]
# feat = feat.reshape(-1,num_feat)
# seg = clf.predict(feat)
# seg = seg.reshape(shp).astype(np.uint8)
# plt.figure(figsize=(16,9))
# plt.imshow(seg, cmap='gray_r')
In [422]:
# plt.figure(figsize=(16,9))
# plt.imshow(im8, cmap='gray')

go back until happy

check on training progress by plausible feature importance

In [94]:
plt.figure(figsize=(16,9))
plt.stem(TS.combined_feature_names, TS.clf.feature_importances_,'x')
plt.xticks(rotation=90)
plt.ylabel('importance') 
# plt.xticks(rotation = 60)
Out[94]:
Text(0, 0.5, 'importance')
No description has been provided for this image

when done, maybe save the classifier and optional the training dict (avoids recalculating the training sets, but might be large)

In [43]:
# TS.pickle_classifier()
pickle.dump(TS.training_dict, open(os.path.join(TS.training_path, pytrain_git_sha+'_training_dict.p'),'wb'))
pickle.dump(TS.combined_feature_names, open(os.path.join(TS.training_path, pytrain_git_sha+'_feature_names.p'),'wb'))
In [46]:
TS.training_dict_full
Out[46]:
{'label_image_y_255_time_40_.tif': (array([[-1.46428571e+02, -4.71285714e+02,  1.71676120e-01, ...,
           1.11674225e+04,  1.06050000e+04,  1.70411936e-01],
         [ 1.54714286e+02, -1.69714286e+02,  1.72399418e-01, ...,
           1.13587042e+04,  1.08410000e+04,  1.71066865e-01],
         [-1.65000000e+02, -3.60142857e+02,  1.71660520e-01, ...,
           1.10823803e+04,  1.03730000e+04,  1.70895247e-01],
         ...,
         [ 1.56428571e+02,  2.81285714e+02,  1.78094529e-01, ...,
           1.19940141e+04,  1.14850000e+04,  1.78078066e-01],
         [ 1.63285714e+02,  1.47000000e+02,  1.77728446e-01, ...,
           1.19303803e+04,  1.13000000e+04,  1.77728446e-01],
         [-2.08571429e+02, -3.50142857e+02,  1.76900536e-01, ...,
           1.15639014e+04,  1.10180000e+04,  1.76805548e-01]]),
  array([0., 0., 0., ..., 2., 2., 2.])),
 'label_image_z_307_time_25_.tif': (array([[-2.46142857e+02, -3.74142857e+02,  1.76275724e-01, ...,
           1.18232958e+04,  1.10610000e+04,  1.74925798e-01],
         [-3.42000000e+02, -4.85857143e+02,  1.76316241e-01, ...,
           1.18405352e+04,  1.11300000e+04,  1.75319188e-01],
         [-1.90428571e+02, -3.74857143e+02,  1.77512776e-01, ...,
           1.21114930e+04,  1.14260000e+04,  1.76005940e-01],
         ...,
         [-1.81714286e+02,  4.80285714e+02,  1.76016286e-01, ...,
           1.10960563e+04,  1.01840000e+04,  1.74601599e-01],
         [ 9.84285714e+01,  6.54285714e+02,  1.75308977e-01, ...,
           1.09472676e+04,  1.03020000e+04,  1.73986060e-01],
         [ 3.74714286e+02,  1.56428571e+02,  1.76564722e-01, ...,
           1.16763662e+04,  1.06990000e+04,  1.75170552e-01]]),
  array([0., 0., 0., ..., 2., 2., 2.])),
 'label_image_x_157_time_5_.tif': (array([[ 6.12714286e+02,  4.12000000e+02,  1.81691079e-01, ...,
           1.18927465e+04,  1.12010000e+04,  1.80019835e-01],
         [ 2.26857143e+02, -1.45142857e+02,  1.81563116e-01, ...,
           1.18323099e+04,  1.09740000e+04,  1.79961364e-01],
         [ 4.12428571e+02,  2.41142857e+02,  1.81434953e-01, ...,
           1.18325352e+04,  1.08200000e+04,  1.79912975e-01],
         ...,
         [ 4.61285714e+02,  1.17900000e+03,  1.82986851e-01, ...,
           1.16421831e+04,  1.06770000e+04,  1.76633232e-01],
         [ 2.95285714e+02,  1.14028571e+03,  1.82246264e-01, ...,
           1.15427042e+04,  1.07200000e+04,  1.76239893e-01],
         [ 2.36571429e+02,  1.05700000e+03,  1.81376676e-01, ...,
           1.14711549e+04,  1.06250000e+04,  1.76194815e-01]]),
  array([0., 0., 0., ..., 2., 2., 2.])),
 'label_image_y_71_time_27_.tif': (array([[-1.35857143e+02,  1.11428571e+01,  1.93519196e-01, ...,
           1.29737324e+04,  1.19270000e+04,  1.91659363e-01],
         [ 1.04714286e+02,  1.25000000e+02,  1.93022896e-01, ...,
           1.29130563e+04,  1.20690000e+04,  1.91258373e-01],
         [ 2.80000000e+01,  1.83000000e+02,  1.92580109e-01, ...,
           1.28927042e+04,  1.18410000e+04,  1.90972621e-01],
         ...,
         [ 1.77571429e+02,  9.50000000e+01,  1.82714666e-01, ...,
           1.17465211e+04,  1.13000000e+04,  1.81494558e-01],
         [ 1.76714286e+02,  2.65714286e+02,  1.83214647e-01, ...,
           1.17702676e+04,  1.11810000e+04,  1.81948427e-01],
         [ 9.18571429e+01,  2.36428571e+02,  1.83764860e-01, ...,
           1.17896901e+04,  1.12550000e+04,  1.82611815e-01]]),
  array([1., 1., 1., ..., 2., 2., 2.])),
 'label_image_x_270_time_50_.tif': (array([[ 7.62285714e+02,  3.66285714e+02,  1.77352465e-01, ...,
           1.17248873e+04,  1.08400000e+04,  1.76199144e-01],
         [ 5.79571429e+02,  1.56857143e+02,  1.77569550e-01, ...,
           1.21831408e+04,  1.15700000e+04,  1.76699324e-01],
         [ 1.67714286e+02, -1.69857143e+02,  1.77088113e-01, ...,
           1.21293662e+04,  1.14650000e+04,  1.76558669e-01],
         ...,
         [ 1.76571429e+02,  3.25571429e+02,  1.78726329e-01, ...,
           1.17209718e+04,  1.11370000e+04,  1.78671522e-01],
         [-1.60285714e+02,  5.28571429e+01,  1.78158173e-01, ...,
           1.14837465e+04,  1.09890000e+04,  1.78069821e-01],
         [-4.00000000e+02, -9.31428571e+01,  1.77880470e-01, ...,
           1.14104507e+04,  1.08650000e+04,  1.77633669e-01]]),
  array([0., 0., 0., ..., 2., 2., 2.])),
 'label_image_x_456_time_0_.tif': (array([[2.07142857e+02, 1.14642857e+03, 1.90178932e-01, ...,
          1.22672817e+04, 1.08070000e+04, 1.77169381e-01],
         [2.49714286e+02, 1.03714286e+03, 1.91092602e-01, ...,
          1.24148451e+04, 1.11670000e+04, 1.79968294e-01],
         [2.52714286e+02, 8.14428571e+02, 1.92194164e-01, ...,
          1.25856901e+04, 1.17740000e+04, 1.83731002e-01],
         ...,
         [2.74285714e+02, 1.28857143e+02, 1.75245020e-01, ...,
          1.16030423e+04, 1.10330000e+04, 1.74652647e-01],
         [1.13857143e+02, 2.27142857e+02, 1.75125835e-01, ...,
          1.12481831e+04, 1.06070000e+04, 1.74509806e-01],
         [3.50428571e+02, 4.15428571e+02, 1.75175942e-01, ...,
          1.12752817e+04, 1.07240000e+04, 1.74449912e-01]]),
  array([0., 0., 0., ..., 2., 2., 2.])),
 'label_image_z_320_time_10_.tif': (array([[8.92857143e+01, 1.87571429e+02, 1.82469480e-01, ...,
          1.22107042e+04, 1.11800000e+04, 1.81068155e-01],
         [6.86000000e+02, 4.32857143e+02, 1.80689966e-01, ...,
          1.20154648e+04, 1.09530000e+04, 1.79250545e-01],
         [3.05285714e+02, 1.21428571e+01, 1.79607390e-01, ...,
          1.19377746e+04, 1.04670000e+04, 1.77957822e-01],
         ...,
         [1.13571429e+02, 2.35428571e+02, 1.76514598e-01, ...,
          1.14960563e+04, 1.07940000e+04, 1.74584281e-01],
         [2.81000000e+02, 4.33000000e+02, 1.76417716e-01, ...,
          1.15155493e+04, 1.07560000e+04, 1.74400619e-01],
         [1.56428571e+02, 1.06857143e+02, 1.76227649e-01, ...,
          1.15197183e+04, 1.08520000e+04, 1.74042724e-01]]),
  array([0., 0., 0., ..., 1., 1., 1.])),
 'label_image_z_564_time_0_.tif': (array([[ 3.13285714e+02,  2.64285714e+02,  1.96006687e-01, ...,
           1.27615915e+04,  1.21920000e+04,  1.94477154e-01],
         [-8.00000000e+01, -1.59285714e+02,  1.95622355e-01, ...,
           1.27900704e+04,  1.22220000e+04,  1.94649750e-01],
         [-9.24285714e+01, -1.13000000e+02,  1.95298843e-01, ...,
           1.27974789e+04,  1.22600000e+04,  1.94626234e-01],
         ...,
         [-4.71428571e+00,  2.92857143e+02,  1.79363554e-01, ...,
           1.12649577e+04,  1.05480000e+04,  1.73660473e-01],
         [ 4.69857143e+02,  7.61000000e+02,  1.79049298e-01, ...,
           1.11457746e+04,  1.03290000e+04,  1.72011410e-01],
         [ 4.55142857e+02,  7.02285714e+02,  1.78584209e-01, ...,
           1.10580845e+04,  1.04030000e+04,  1.70629070e-01]]),
  array([0., 0., 0., ..., 2., 2., 2.])),
 'label_image_z_320_time_30_.tif': (array([[ 2.14857143e+02,  1.52285714e+02,  2.01065715e-01, ...,
           1.33168028e+04,  1.19130000e+04,  1.96774246e-01],
         [-3.23714286e+02,  1.71428571e+00,  2.01161255e-01, ...,
           1.33518310e+04,  1.18630000e+04,  1.96969844e-01],
         [-3.62714286e+02, -9.71428571e+00,  2.01125502e-01, ...,
           1.33800563e+04,  1.20740000e+04,  1.96864119e-01],
         ...,
         [-1.48485714e+03, -6.54714286e+02,  1.77340966e-01, ...,
           1.15403099e+04,  1.06520000e+04,  1.76559372e-01],
         [-1.58242857e+03, -6.48857143e+02,  1.79606184e-01, ...,
           1.18334648e+04,  1.09120000e+04,  1.78930439e-01],
         [-7.85000000e+02,  1.34000000e+02,  1.82066069e-01, ...,
           1.23317606e+04,  1.13080000e+04,  1.81227066e-01]]),
  array([0., 0., 0., ..., 1., 1., 1.])),
 'label_image_x_264_time_29_.tif': (array([[-8.55714286e+01,  4.90000000e+01,  1.95446785e-01, ...,
           1.27986338e+04,  1.23040000e+04,  1.93879462e-01],
         [ 1.22857143e+02,  2.16428571e+02,  1.95580021e-01, ...,
           1.27709296e+04,  1.20220000e+04,  1.93749050e-01],
         [ 1.40000000e+01,  2.14285714e+01,  1.95790805e-01, ...,
           1.28141268e+04,  1.21830000e+04,  1.93688263e-01],
         ...,
         [-4.59857143e+02, -3.76000000e+02,  1.76360268e-01, ...,
           1.13471831e+04,  1.02530000e+04,  1.70212563e-01],
         [-2.67285714e+02, -5.01000000e+02,  1.76233976e-01, ...,
           1.13495915e+04,  1.02230000e+04,  1.70287334e-01],
         [ 9.31428571e+01, -1.57714286e+02,  1.75952877e-01, ...,
           1.13085915e+04,  1.02450000e+04,  1.70298715e-01]]),
  array([0., 0., 0., ..., 2., 2., 2.])),
 'label_image_y_280_time_57_.tif': (array([[-8.58571429e+01,  7.68571429e+01,  1.82389721e-01, ...,
           1.20554085e+04,  1.14720000e+04,  1.81354376e-01],
         [-2.18000000e+02,  1.72000000e+02,  1.82640049e-01, ...,
           1.22886620e+04,  1.16720000e+04,  1.81573052e-01],
         [ 3.24000000e+02,  3.61428571e+02,  1.82303870e-01, ...,
           1.19934366e+04,  1.13380000e+04,  1.81405977e-01],
         ...,
         [ 9.55714286e+01,  2.29714286e+02,  1.71066023e-01, ...,
           1.12507606e+04,  9.87400000e+03,  1.69633661e-01],
         [ 1.79000000e+02,  3.07428571e+02,  1.71033091e-01, ...,
           1.12309014e+04,  1.01770000e+04,  1.69707987e-01],
         [ 2.13714286e+02,  1.01428571e+01,  1.70781424e-01, ...,
           1.11918310e+04,  9.93100000e+03,  1.69562468e-01]]),
  array([0., 0., 0., ..., 1., 1., 1.])),
 'label_image_y_250_time_23_.tif': (array([[ 6.29571429e+02,  2.80571429e+02,  1.83566668e-01, ...,
           1.19565634e+04,  1.11340000e+04,  1.81957534e-01],
         [ 2.94000000e+02,  9.17142857e+01,  1.83483545e-01, ...,
           1.19102113e+04,  1.11850000e+04,  1.81933184e-01],
         [-2.17428571e+02, -4.80142857e+02,  1.83503721e-01, ...,
           1.19503380e+04,  1.11650000e+04,  1.81941278e-01],
         ...,
         [-5.99142857e+02, -3.23000000e+02,  1.75209452e-01, ...,
           1.13515493e+04,  1.06300000e+04,  1.73828147e-01],
         [-2.53428571e+02,  3.61428571e+01,  1.76540683e-01, ...,
           1.16698169e+04,  1.10110000e+04,  1.76169142e-01],
         [ 3.13714286e+02,  4.45285714e+02,  1.77460691e-01, ...,
           1.18957606e+04,  1.12630000e+04,  1.76957676e-01]]),
  array([0., 0., 0., ..., 2., 2., 2.]))}
In [103]:
TS.training_path
Out[103]:
'/mpc/homes/fische_r/NAS/DASCOELY/processing/05_water_GDL_ML/1'
In [50]:
pytrain_git_sha
Out[50]:
'ec4415d'

Segmentation of full data set

segmentation has not yet be checked for functionality after dask update! probably needs major restructuring but should work eventually

In [37]:
from segmentation import segmentation
In [38]:
classifier_path=os.path.join(training_path, 'classifier.p')
SM = segmentation(training_path = training_path, classifier_path=classifier_path)
In [39]:
# SM.import_lazy_feature_data(IF.result)
# SM.import_classifier(TS.clf)
SM.clf = pickle.load(open(os.path.join(training_path, 'classifier.p'), 'rb'))
In [40]:
clf = SM.clf
clf.n_jobs = 64

if host == 'mpc2053.psi.ch':
    clf.n_jobs = 20
In [41]:
#TODO create result as a stream for every feature set of chunks, i.e stack of 67 feature chunks
# clf = TS.clf
# clf.n_jobs = 32
In [42]:
# loc_feats = [-4, -5, -6]
# ids = np.ones(72, dtype=bool)
# for f in loc_feats:
#     ids[f] = False
In [43]:
# ids

merge time-independent features

In [44]:
test = dask.array.stack([TS.feat_data['feature_stack_time_independent'][:,:,:,0,:]]*da.shape[-1], axis=-2)
2023-04-28 08:57:28,625 - distributed.utils_perf - WARNING - full garbage collections took 10% CPU time recently (threshold: 10%)
2023-04-28 08:57:33,885 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
In [45]:
test
Out[45]:
Array Chunk
Bytes 543.37 GiB 256.00 kiB
Shape (750, 130, 1700, 88, 5) (32, 32, 32, 1, 1)
Dask graph 4049760 chunks in 3 graph layers
Data type float64 numpy.ndarray
130 750 5 88 1700
In [46]:
feat = dask.array.concatenate([TS.feat_data['feature_stack'], test], axis=-1)
2023-04-28 08:57:55,416 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 08:58:41,121 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 08:59:32,401 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 09:00:31,800 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 09:01:44,643 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 09:03:17,745 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
In [47]:
# feat = feat[...,ids]
In [48]:
feat
Out[48]:
Array Chunk
Bytes 7.32 TiB 256.00 kiB
Shape (750, 130, 1700, 88, 69) (32, 32, 32, 1, 1)
Dask graph 59019840 chunks in 450 graph layers
Data type float64 numpy.ndarray
130 750 69 88 1700

check aligment of chunks and select suitable "super-chunk" shape to split calculation in parts, example below

In [49]:
750*2.3
Out[49]:
1724.9999999999998
In [50]:
350/50
Out[50]:
7.0
In [51]:
i = 12
j = i
# j = 3
dim1 = 64#better use multiple of chunk size !?
dim2 = int(2.3*dim1)
feat[i*dim1:(i+1)*dim1,:,j*dim2:(j+1)*dim2,:,:] #select all features and all time steps, but you are free in space; als large as possible and as small as necessary. limit is the available RAM to collect the dask result
Out[51]:
Array Chunk
Bytes 0 B 0 B
Shape (0, 130, 0, 88, 69) (0, 32, 0, 1, 1)
Dask graph 36432 chunks in 451 graph layers
Data type float64 numpy.ndarray
In [52]:
# segs = np.zeros(feat.shape[:4], dtype=np.uint8)
segs = pickle.load(open(os.path.join(training_path,'segs.p'), 'rb'))
2023-04-28 09:06:48,538 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 09:07:43,656 - distributed.utils_perf - WARNING - full garbage collections took 10% CPU time recently (threshold: 10%)
2023-04-28 09:07:45,739 - distributed.utils_perf - WARNING - full garbage collections took 10% CPU time recently (threshold: 10%)
2023-04-28 09:07:48,023 - distributed.utils_perf - WARNING - full garbage collections took 11% CPU time recently (threshold: 10%)
2023-04-28 09:07:50,311 - distributed.utils_perf - WARNING - full garbage collections took 11% CPU time recently (threshold: 10%)
2023-04-28 09:07:52,682 - distributed.utils_perf - WARNING - full garbage collections took 11% CPU time recently (threshold: 10%)
2023-04-28 09:07:55,129 - distributed.utils_perf - WARNING - full garbage collections took 11% CPU time recently (threshold: 10%)
2023-04-28 09:07:57,693 - distributed.utils_perf - WARNING - full garbage collections took 11% CPU time recently (threshold: 10%)
2023-04-28 09:08:00,064 - distributed.utils_perf - WARNING - full garbage collections took 11% CPU time recently (threshold: 10%)
2023-04-28 09:08:02,809 - distributed.utils_perf - WARNING - full garbage collections took 11% CPU time recently (threshold: 10%)
In [53]:
# import gc
client.run(gc.collect)
gc.collect()
2023-04-28 09:08:05,439 - distributed.utils_perf - WARNING - full garbage collections took 12% CPU time recently (threshold: 10%)
2023-04-28 09:08:25,753 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
Out[53]:
101
In [54]:
import ctypes
def trim_memory() -> int:
    libc = ctypes.CDLL("libc.so.6")
    return libc.malloc_trim(0)
In [55]:
limit = 12
# TODO: Workaround: write intermediates to disk and restart dask client/scheduler/workers to get rid of unmanaged memory
# possible solution: do data handling on mpc2053 and calculations on mpc2959 -> leaves some RAM --> gc.collect() not necessary anymore
# appearantly starts to become critical after 12 iteratons
# from i=7, j=10 already done
# TODO write i and j to file to track progress even when closing jupyter
for i in range(1,limit):
    pickle.dump(segs, open(os.path.join(training_path,'segs_temp.p'), 'wb'))
    
    print(str(i+1)+'/'+str(limit))
    # plt.figure()
    # plt.imshow(segs[:,100,:, 50])
    # plt.savefig(os.path.join(training_path, sample+'_'+str(i)+'_progress.png'))
    # client.run(gc.collect)
    # client.run(trim_memory)
    start = 0
    if i == 1:
        start = 8
    for j in range(start,limit):
        gc.collect() 
        client.run(gc.collect)
        client.run(trim_memory)
        print(j)
        #with joblib.parallel_backend('dask'):
        part = feat[i*dim1:(i+1)*dim1,:,j*dim2:(j+1)*dim2,:,:] #.persist() #compute() may blow up the memory ?! https://stackoverflow.com/questions/73770527/dask-compute-uses-twice-the-expected-memory
        if 0 in part.shape:
            print('hit the edge (one dimension 0), ignore')
            continue
        part = part.compute()
        # try to release old unmanaged memory
        client.run(gc.collect)
        client.run(trim_memory)

        shp = part.shape
        num_feat = part.shape[-1]  
        part = part.reshape(-1,num_feat)

        psplit = int(part.shape[0]/2)

        print('create part 1')
        part1 = part[:psplit,:]
        print('create part 2')
        part2 = part[psplit:,:]
        print('segmenting 1')

        # with joblib.parallel_backend('dask'):
        seg1 = clf.predict(part1).astype(np.uint8)
        del part1
        print('segmenting 2')
        # with joblib.parallel_backend('dask'):
        seg2 = clf.predict(part2).astype(np.uint8)
        print('wrap results')
        del part2
        del part
        # gc.collect()

        seg = np.concatenate([seg1,seg2])
        print(seg.shape)
        del seg1
        del seg2

        seg = seg.reshape(shp[:4])  #this step needs a lot of RAM ?! appearantly not
    
    # not sure if this switch cases are necessary
        if i < limit-1 and j < limit-1:
            segs[i*dim1:(i+1)*dim1,:,j*dim2:(j+1)*dim2,:] = seg
        elif not i < limit-1 and j < limit-1:
            segs[i*dim1:,:,j*dim2:(j+1)*dim2,:] =  seg
        elif not j < limit-1 and i < limit-1:
            segs[i*dim1:(i+1)*dim1,:,j*dim2:,:] =  seg
        else:
            segs[i*dim1:,:,j*dim2:,:] = seg
            
        del seg
2023-04-28 09:08:27,911 - distributed.utils_perf - WARNING - full garbage collections took 13% CPU time recently (threshold: 10%)
2023-04-28 09:08:32,501 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
2023-04-28 09:08:36,089 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
2023-04-28 09:08:39,271 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
2023-04-28 09:08:43,344 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
2023-04-28 09:08:47,329 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
2023-04-28 09:08:51,357 - distributed.utils_perf - WARNING - full garbage collections took 15% CPU time recently (threshold: 10%)
2023-04-28 09:08:56,011 - distributed.utils_perf - WARNING - full garbage collections took 15% CPU time recently (threshold: 10%)
2023-04-28 09:09:01,183 - distributed.utils_perf - WARNING - full garbage collections took 15% CPU time recently (threshold: 10%)
2023-04-28 09:09:06,433 - distributed.utils_perf - WARNING - full garbage collections took 15% CPU time recently (threshold: 10%)
1/12
2023-04-28 09:09:25,644 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 09:09:26,978 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
0
2023-04-28 09:11:01,156 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 09:12:35,274 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 09:12:41,381 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 09:13:15,229 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 09:13:54,218 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-04-28 09:14:01,552 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-04-28 09:14:09,541 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-04-28 09:14:17,976 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-04-28 09:14:26,864 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-04-28 09:14:35,784 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-04-28 09:14:45,427 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-04-28 09:14:55,782 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-04-28 09:15:07,106 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-04-28 09:15:18,367 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-04-28 09:15:30,881 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-04-28 09:15:43,739 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-04-28 09:15:57,649 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-04-28 09:16:13,692 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-04-28 09:16:30,197 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-04-28 09:16:45,526 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-04-28 09:17:02,614 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-04-28 09:17:21,621 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-04-28 09:17:43,388 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 09:18:04,014 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 09:18:26,656 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 09:18:54,293 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 09:19:22,554 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 09:19:50,806 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 09:20:17,625 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 09:20:44,917 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 09:21:15,715 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 09:21:50,743 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 09:22:29,074 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 09:23:10,350 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 09:23:49,683 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 09:24:34,795 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 09:25:14,922 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 09:25:59,410 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 09:26:56,132 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 09:30:19,717 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
create part 1
create part 2
segmenting 1
segmenting 2
wrap results
(107627520,)
2023-04-28 09:33:50,338 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-04-28 09:33:54,598 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
1
2023-04-28 09:37:46,191 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-04-28 09:38:27,164 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 09:38:54,606 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 09:39:25,908 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 09:39:58,196 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 09:40:33,729 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 09:41:10,943 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 09:41:50,088 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 09:42:30,832 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 09:43:14,191 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 09:43:58,595 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 09:44:43,626 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 09:46:00,337 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 09:48:51,714 - distributed.utils_perf - WARNING - full garbage collections took 15% CPU time recently (threshold: 10%)
create part 1
create part 2
segmenting 1
segmenting 2
wrap results
(107627520,)
2023-04-28 09:52:24,671 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 09:52:29,122 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2
2023-04-28 09:56:18,982 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 09:57:00,346 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 09:57:28,002 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 09:58:00,087 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 09:58:33,375 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 09:59:09,327 - distributed.utils_perf - WARNING - full garbage collections took 15% CPU time recently (threshold: 10%)
2023-04-28 09:59:45,641 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:00:25,569 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:01:05,448 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:01:49,007 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:02:33,796 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:03:20,597 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:05:16,109 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:07:28,457 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
create part 1
create part 2
segmenting 1
segmenting 2
wrap results
(107627520,)
2023-04-28 10:11:00,336 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 10:11:04,810 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
3
2023-04-28 10:14:56,290 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 10:15:38,548 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:16:05,579 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:16:36,660 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 10:17:09,639 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:17:45,001 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 10:18:22,414 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 10:19:03,250 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:19:43,380 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:20:26,470 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:21:12,471 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:22:00,256 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:23:44,218 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:26:08,975 - distributed.utils_perf - WARNING - full garbage collections took 15% CPU time recently (threshold: 10%)
create part 1
create part 2
segmenting 1
segmenting 2
wrap results
(107627520,)
2023-04-28 10:29:48,016 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 10:29:52,217 - distributed.utils_perf - WARNING - full garbage collections took 15% CPU time recently (threshold: 10%)
4
2023-04-28 10:33:41,743 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 10:34:22,697 - distributed.utils_perf - WARNING - full garbage collections took 15% CPU time recently (threshold: 10%)
2023-04-28 10:34:51,289 - distributed.utils_perf - WARNING - full garbage collections took 15% CPU time recently (threshold: 10%)
2023-04-28 10:35:19,666 - distributed.utils_perf - WARNING - full garbage collections took 15% CPU time recently (threshold: 10%)
2023-04-28 10:35:53,907 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:36:29,200 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:37:07,892 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:37:46,968 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:38:26,660 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:39:10,050 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 10:39:54,893 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 10:40:42,002 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 10:42:35,835 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 10:44:41,386 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
create part 1
create part 2
segmenting 1
segmenting 2
wrap results
(107627520,)
2023-04-28 10:48:11,129 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:48:15,871 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
5
2023-04-28 10:52:05,326 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:52:47,845 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:53:15,818 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:53:50,242 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 10:54:24,344 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 10:54:59,402 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 10:55:37,198 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 10:56:17,524 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 10:56:59,300 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:57:42,657 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:58:27,726 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 10:59:14,335 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 11:00:35,482 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 11:03:26,363 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
create part 1
create part 2
segmenting 1
segmenting 2
wrap results
(107627520,)
2023-04-28 11:07:03,483 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 11:07:07,776 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
6
2023-04-28 11:10:58,040 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 11:11:39,632 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 11:12:06,874 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 11:12:39,453 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 11:13:13,249 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 11:13:49,158 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 11:14:27,064 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 11:15:07,297 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 11:15:48,265 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 11:16:31,585 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 11:17:15,741 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 11:18:03,977 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 11:19:36,288 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 11:22:16,177 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
create part 1
create part 2
segmenting 1
segmenting 2
wrap results
(107627520,)
2023-04-28 11:25:48,682 - distributed.utils_perf - WARNING - full garbage collections took 15% CPU time recently (threshold: 10%)
2023-04-28 11:25:53,441 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
7
2023-04-28 11:29:44,829 - distributed.utils_perf - WARNING - full garbage collections took 15% CPU time recently (threshold: 10%)
2023-04-28 11:30:26,363 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 11:30:53,563 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 11:31:26,302 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 11:31:59,741 - distributed.utils_perf - WARNING - full garbage collections took 15% CPU time recently (threshold: 10%)
2023-04-28 11:32:35,852 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 11:33:15,321 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 11:33:55,385 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 11:34:37,626 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 11:35:21,658 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 11:36:07,559 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 11:36:54,741 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 11:38:49,685 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 11:41:03,867 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
create part 1
create part 2
segmenting 1
segmenting 2
wrap results
(107627520,)
2023-04-28 11:44:40,915 - distributed.utils_perf - WARNING - full garbage collections took 15% CPU time recently (threshold: 10%)
2023-04-28 11:44:45,410 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
8
2023-04-28 11:48:37,955 - distributed.utils_perf - WARNING - full garbage collections took 15% CPU time recently (threshold: 10%)
2023-04-28 11:49:19,274 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 11:49:46,124 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 11:50:18,363 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 11:50:51,032 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 11:51:26,612 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 11:52:04,024 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 11:52:43,596 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 11:53:25,026 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 11:54:08,284 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 11:54:53,228 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 11:55:41,312 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 11:56:49,790 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 11:59:53,855 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
create part 1
create part 2
segmenting 1
segmenting 2
wrap results
(107627520,)
2023-04-28 12:03:31,389 - distributed.utils_perf - WARNING - full garbage collections took 15% CPU time recently (threshold: 10%)
2023-04-28 12:03:36,150 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
9
2023-04-28 12:07:28,107 - distributed.utils_perf - WARNING - full garbage collections took 15% CPU time recently (threshold: 10%)
2023-04-28 12:08:10,790 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 12:08:39,711 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 12:09:09,877 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 12:09:45,295 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 12:10:25,262 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 12:11:02,139 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 12:11:40,774 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 12:12:21,747 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 12:13:04,126 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 12:13:50,461 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 12:14:38,215 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 12:16:32,854 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 12:18:36,025 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
create part 1
create part 2
segmenting 1
segmenting 2
wrap results
(107627520,)
2023-04-28 12:22:03,472 - distributed.utils_perf - WARNING - full garbage collections took 15% CPU time recently (threshold: 10%)
2023-04-28 12:22:08,496 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
10
2023-04-28 12:26:01,489 - distributed.utils_perf - WARNING - full garbage collections took 15% CPU time recently (threshold: 10%)
2023-04-28 12:26:46,874 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 12:27:15,977 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 12:27:48,595 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 12:28:21,703 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 12:28:58,852 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 12:29:36,198 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 12:30:15,999 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 12:30:58,120 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 12:31:41,151 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 12:32:27,322 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 12:33:16,405 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 12:34:23,925 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 12:37:30,309 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
create part 1
create part 2
segmenting 1
segmenting 2
wrap results
(107627520,)
2023-04-28 12:41:10,472 - distributed.utils_perf - WARNING - full garbage collections took 15% CPU time recently (threshold: 10%)
2023-04-28 12:41:14,911 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
11
2023-04-28 12:45:03,101 - distributed.utils_perf - WARNING - full garbage collections took 15% CPU time recently (threshold: 10%)
2023-04-28 12:45:44,089 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 12:46:14,342 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 12:46:45,671 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 12:47:19,015 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 12:47:52,533 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 12:48:30,022 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 12:49:09,080 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 12:49:52,034 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 12:50:35,136 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 13:07:20,522 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 13:08:07,391 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 13:10:50,284 - distributed.utils_perf - WARNING - full garbage collections took 15% CPU time recently (threshold: 10%)
2023-04-28 13:11:33,187 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 13:12:21,331 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
create part 1
create part 2
segmenting 1
segmenting 2
wrap results
(107627520,)
2023-04-28 13:15:58,072 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
2023-04-28 13:16:02,683 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
1
2023-04-28 13:19:59,444 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
2023-04-28 13:20:41,630 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 13:21:10,911 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 13:21:39,342 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 13:22:14,396 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 13:22:51,258 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 13:23:30,507 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 13:24:10,381 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 13:24:52,140 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 13:25:37,076 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 13:26:24,691 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 13:27:12,810 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 13:28:02,144 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 13:31:36,336 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
create part 1
create part 2
segmenting 1
segmenting 2
wrap results
(107627520,)
2023-04-28 13:35:13,229 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
2023-04-28 13:35:17,909 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2
2023-04-28 13:39:14,703 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
2023-04-28 13:39:57,621 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 13:40:26,990 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 13:40:56,744 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 13:41:34,049 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 13:42:11,988 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 13:42:50,804 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 13:43:31,230 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 13:44:14,245 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 13:44:57,120 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 13:45:42,661 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 13:46:31,464 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 13:47:29,831 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 13:50:53,316 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
create part 1
create part 2
segmenting 1
segmenting 2
wrap results
(107627520,)
2023-04-28 13:54:35,278 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
2023-04-28 13:54:40,081 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
3
2023-04-28 13:58:34,454 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
2023-04-28 13:59:17,889 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 13:59:47,860 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 14:00:18,009 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 14:00:54,121 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 14:01:31,134 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 14:02:09,636 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 14:02:49,887 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 14:03:30,509 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 14:04:14,728 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-04-28 14:05:04,603 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-04-28 14:05:53,088 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-04-28 14:06:43,267 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-04-28 14:10:13,672 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
create part 1
create part 2
segmenting 1
segmenting 2
wrap results
(107627520,)
2023-04-28 14:13:35,523 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
2023-04-28 14:13:40,258 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
4
2023-04-28 14:17:35,143 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
2023-04-28 14:18:17,955 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-04-28 14:18:46,786 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-04-28 14:19:15,680 - distributed.utils_perf - WARNING - full garbage collections took 19% CPU time recently (threshold: 10%)
2023-04-28 14:19:51,514 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 14:20:25,755 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 14:21:03,917 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 14:21:43,329 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 14:22:24,718 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 14:23:08,127 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 14:23:54,127 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 14:24:42,352 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 14:25:40,825 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 14:29:02,860 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
create part 1
create part 2
segmenting 1
segmenting 2
wrap results
(107627520,)
2023-04-28 14:32:29,962 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
2023-04-28 14:32:34,324 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
5
2023-04-28 14:36:30,886 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
2023-04-28 14:37:13,969 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 14:37:41,245 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 14:38:13,130 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 14:38:46,967 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 14:39:23,051 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 14:40:01,342 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 14:40:40,535 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 14:41:20,840 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 14:42:04,244 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 14:42:50,823 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 14:43:37,942 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 14:44:27,596 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 14:47:53,847 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
create part 1
create part 2
segmenting 1
segmenting 2
wrap results
(107627520,)
2023-04-28 14:51:25,857 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
2023-04-28 14:51:30,790 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
6
2023-04-28 14:55:29,831 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
2023-04-28 14:56:13,924 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 14:56:43,053 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 14:57:12,522 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 14:57:46,826 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 14:58:22,673 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 14:59:01,253 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 14:59:40,928 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 15:00:20,946 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 15:01:05,100 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 15:01:49,891 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 15:02:39,776 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 15:03:32,132 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 15:07:10,902 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
create part 1
create part 2
segmenting 1
segmenting 2
wrap results
(107627520,)
2023-04-28 15:10:36,955 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
2023-04-28 15:10:41,352 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
7
2023-04-28 15:14:34,270 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
2023-04-28 15:15:15,894 - distributed.utils_perf - WARNING - full garbage collections took 16% CPU time recently (threshold: 10%)
2023-04-28 15:15:44,486 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 15:16:13,034 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 15:16:49,559 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 15:17:24,824 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 15:18:03,753 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 15:18:43,737 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 15:19:24,557 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 15:20:08,061 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
2023-04-28 15:20:53,711 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 15:21:41,409 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 15:22:41,104 - distributed.utils_perf - WARNING - full garbage collections took 18% CPU time recently (threshold: 10%)
2023-04-28 15:26:02,808 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
create part 1
create part 2
segmenting 1
segmenting 2
wrap results
(107627520,)
2023-04-28 15:29:26,309 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
2023-04-28 15:29:30,723 - distributed.utils_perf - WARNING - full garbage collections took 17% CPU time recently (threshold: 10%)
8
2023-04-28 15:33:25,978 - distributed.core - ERROR - Timed out during handshake while connecting to tcp://127.0.0.1:36551 after 30 s
Traceback (most recent call last):
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/asyncio/runners.py", line 44, in run
    return loop.run_until_complete(main)
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/asyncio/base_events.py", line 636, in run_until_complete
    self.run_forever()
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
    self._run_once()
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/asyncio/base_events.py", line 1868, in _run_once
    event_list = self._selector.select(timeout)
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/selectors.py", line 469, in select
    fd_event_list = self._selector.poll(timeout, max_ev)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/distributed/comm/tcp.py", line 225, in read
    frames_nbytes = await stream.read_bytes(fmt_size)
asyncio.exceptions.CancelledError

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/asyncio/tasks.py", line 456, in wait_for
    return fut.result()
asyncio.exceptions.CancelledError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/distributed/comm/core.py", line 328, in connect
    handshake = await asyncio.wait_for(comm.read(), time_left())
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/asyncio/tasks.py", line 458, in wait_for
    raise exceptions.TimeoutError() from exc
asyncio.exceptions.TimeoutError

The above exception was the direct cause of the following exception:

Traceback (most recent call last)2023-04-28 15:33:28,390 - distributed.utils_perf - WARNING - full garbage collections took 14% CPU time recently (threshold: 10%)
:
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/distributed/utils.py", line 741, in wrapper
    return await func(*args, **kwargs)
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/distributed/worker.py", line 1566, in close
    await r.close_gracefully(reason=reason)
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/distributed/core.py", line 1224, in send_recv_from_rpc
    comm = await self.pool.connect(self.addr)
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/distributed/core.py", line 1468, in connect
    return await connect_attempt
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/distributed/core.py", line 1389, in _connect
    comm = await connect(
  File "/mpc/homes/fische_r/miniconda3/lib/python3.10/site-packages/distributed/comm/core.py", line 333, in connect
    raise OSError(
OSError: Timed out during handshake while connecting to tcp://127.0.0.1:36551 after 30 s
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In[55], line 29
     27     print('hit the edge (one dimension 0), ignore')
     28     continue
---> 29 part = part.compute()
     30 # try to release old unmanaged memory
     31 client.run(gc.collect)

File ~/miniconda3/lib/python3.10/site-packages/dask/base.py:314, in DaskMethodsMixin.compute(self, **kwargs)
    290 def compute(self, **kwargs):
    291     """Compute this dask collection
    292 
    293     This turns a lazy Dask collection into its in-memory equivalent.
   (...)
    312     dask.base.compute
    313     """
--> 314     (result,) = compute(self, traverse=False, **kwargs)
    315     return result

File ~/miniconda3/lib/python3.10/site-packages/dask/base.py:599, in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
    596     keys.append(x.__dask_keys__())
    597     postcomputes.append(x.__dask_postcompute__())
--> 599 results = schedule(dsk, keys, **kwargs)
    600 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])

File ~/miniconda3/lib/python3.10/site-packages/distributed/client.py:3136, in Client.get(self, dsk, keys, workers, allow_other_workers, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs)
   3134         should_rejoin = False
   3135 try:
-> 3136     results = self.gather(packed, asynchronous=asynchronous, direct=direct)
   3137 finally:
   3138     for f in futures.values():

File ~/miniconda3/lib/python3.10/site-packages/distributed/client.py:2305, in Client.gather(self, futures, errors, direct, asynchronous)
   2303 else:
   2304     local_worker = None
-> 2305 return self.sync(
   2306     self._gather,
   2307     futures,
   2308     errors=errors,
   2309     direct=direct,
   2310     local_worker=local_worker,
   2311     asynchronous=asynchronous,
   2312 )

File ~/miniconda3/lib/python3.10/site-packages/distributed/utils.py:338, in SyncMethodMixin.sync(self, func, asynchronous, callback_timeout, *args, **kwargs)
    336     return future
    337 else:
--> 338     return sync(
    339         self.loop, func, *args, callback_timeout=callback_timeout, **kwargs
    340     )

File ~/miniconda3/lib/python3.10/site-packages/distributed/utils.py:401, in sync(loop, func, callback_timeout, *args, **kwargs)
    399 else:
    400     while not e.is_set():
--> 401         wait(10)
    403 if error:
    404     typ, exc, tb = error

File ~/miniconda3/lib/python3.10/site-packages/distributed/utils.py:390, in sync.<locals>.wait(timeout)
    388 def wait(timeout):
    389     try:
--> 390         return e.wait(timeout)
    391     except KeyboardInterrupt:
    392         loop.add_callback(cancel)

File ~/miniconda3/lib/python3.10/threading.py:607, in Event.wait(self, timeout)
    605 signaled = self._flag
    606 if not signaled:
--> 607     signaled = self._cond.wait(timeout)
    608 return signaled

File ~/miniconda3/lib/python3.10/threading.py:324, in Condition.wait(self, timeout)
    322 else:
    323     if timeout > 0:
--> 324         gotit = waiter.acquire(True, timeout)
    325     else:
    326         gotit = waiter.acquire(False)

KeyboardInterrupt: 
In [58]:
i
Out[58]:
1
In [59]:
j
Out[59]:
8
In [57]:
plt.imshow(segs[:,50,:, 20])
Out[57]:
<matplotlib.image.AxesImage at 0x7f61a8872200>
No description has been provided for this image
In [56]:
pickle.dump(segs, open(os.path.join(training_path,'segs.p'), 'wb'))
2023-04-28 15:34:27,314 - distributed.nanny - WARNING - Worker process still alive after 3.199999389648438 seconds, killing

save result to disk

In [62]:
# TODO: include metadata in segmented nc and

shp = segs.shape
segdata = xr.Dataset({'segmented': (['x','y','z','timestep'], segs),
                     't_utc': ('timestep', t_utc),
                     'time': ('timestep', time)},
                               coords = {'x': np.arange(shp[0]),
                               'y': np.arange(shp[1]),
                               'z': np.arange(shp[2]),
                               'timestep': np.arange(shp[3]),
                               'feature': TS.combined_feature_names}
                     )
segdata.attrs = data.attrs.copy()
segdata.attrs['05_ML_cropping'] = [a,b,c,d,e,f]
segdata.attrs['pytrain_git'] = pytrain_git_sha
segdata.attrs['05_coely_gitsha'] = git_sha
segdata.attrs['GDL_crop'] = GDL_crop
In [63]:
segpath = os.path.join(training_path_sample, sample+'water_segmentation.nc')
In [64]:
segdata.to_netcdf(segpath)
In [75]:
# load intermediate result
seg_data = xr.load_dataset(segpath)
In [37]:
segs = seg_data['segmented'].data
In [61]:
segdata
Out[61]:
<style>/* CSS stylesheet for displaying xarray objects in jupyterlab. * */ :root { --xr-font-color0: var(--jp-content-font-color0, rgba(0, 0, 0, 1)); --xr-font-color2: var(--jp-content-font-color2, rgba(0, 0, 0, 0.54)); --xr-font-color3: var(--jp-content-font-color3, rgba(0, 0, 0, 0.38)); --xr-border-color: var(--jp-border-color2, #e0e0e0); --xr-disabled-color: var(--jp-layout-color3, #bdbdbd); --xr-background-color: var(--jp-layout-color0, white); --xr-background-color-row-even: var(--jp-layout-color1, white); --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee); } html[theme=dark], body[data-theme=dark], body.vscode-dark { --xr-font-color0: rgba(255, 255, 255, 1); --xr-font-color2: rgba(255, 255, 255, 0.54); --xr-font-color3: rgba(255, 255, 255, 0.38); --xr-border-color: #1F1F1F; --xr-disabled-color: #515151; --xr-background-color: #111111; --xr-background-color-row-even: #111111; --xr-background-color-row-odd: #313131; } .xr-wrap { display: block !important; min-width: 300px; max-width: 700px; } .xr-text-repr-fallback { /* fallback to plain text repr when CSS is not injected (untrusted notebook) */ display: none; } .xr-header { padding-top: 6px; padding-bottom: 6px; margin-bottom: 4px; border-bottom: solid 1px var(--xr-border-color); } .xr-header > div, .xr-header > ul { display: inline; margin-top: 0; margin-bottom: 0; } .xr-obj-type, .xr-array-name { margin-left: 2px; margin-right: 10px; } .xr-obj-type { color: var(--xr-font-color2); } .xr-sections { padding-left: 0 !important; display: grid; grid-template-columns: 150px auto auto 1fr 20px 20px; } .xr-section-item { display: contents; } .xr-section-item input { display: none; } .xr-section-item input + label { color: var(--xr-disabled-color); } .xr-section-item input:enabled + label { cursor: pointer; color: var(--xr-font-color2); } .xr-section-item input:enabled + label:hover { color: var(--xr-font-color0); } .xr-section-summary { grid-column: 1; color: var(--xr-font-color2); font-weight: 500; } .xr-section-summary > span { display: inline-block; padding-left: 0.5em; } .xr-section-summary-in:disabled + label { color: var(--xr-font-color2); } .xr-section-summary-in + label:before { display: inline-block; content: '►'; font-size: 11px; width: 15px; text-align: center; } .xr-section-summary-in:disabled + label:before { color: var(--xr-disabled-color); } .xr-section-summary-in:checked + label:before { content: '▼'; } .xr-section-summary-in:checked + label > span { display: none; } .xr-section-summary, .xr-section-inline-details { padding-top: 4px; padding-bottom: 4px; } .xr-section-inline-details { grid-column: 2 / -1; } .xr-section-details { display: none; grid-column: 1 / -1; margin-bottom: 5px; } .xr-section-summary-in:checked ~ .xr-section-details { display: contents; } .xr-array-wrap { grid-column: 1 / -1; display: grid; grid-template-columns: 20px auto; } .xr-array-wrap > label { grid-column: 1; vertical-align: top; } .xr-preview { color: var(--xr-font-color3); } .xr-array-preview, .xr-array-data { padding: 0 5px !important; grid-column: 2; } .xr-array-data, .xr-array-in:checked ~ .xr-array-preview { display: none; } .xr-array-in:checked ~ .xr-array-data, .xr-array-preview { display: inline-block; } .xr-dim-list { display: inline-block !important; list-style: none; padding: 0 !important; margin: 0; } .xr-dim-list li { display: inline-block; padding: 0; margin: 0; } .xr-dim-list:before { content: '('; } .xr-dim-list:after { content: ')'; } .xr-dim-list li:not(:last-child):after { content: ','; padding-right: 5px; } .xr-has-index { font-weight: bold; } .xr-var-list, .xr-var-item { display: contents; } .xr-var-item > div, .xr-var-item label, .xr-var-item > .xr-var-name span { background-color: var(--xr-background-color-row-even); margin-bottom: 0; } .xr-var-item > .xr-var-name:hover span { padding-right: 5px; } .xr-var-list > li:nth-child(odd) > div, .xr-var-list > li:nth-child(odd) > label, .xr-var-list > li:nth-child(odd) > .xr-var-name span { background-color: var(--xr-background-color-row-odd); } .xr-var-name { grid-column: 1; } .xr-var-dims { grid-column: 2; } .xr-var-dtype { grid-column: 3; text-align: right; color: var(--xr-font-color2); } .xr-var-preview { grid-column: 4; } .xr-index-preview { grid-column: 2 / 5; color: var(--xr-font-color2); } .xr-var-name, .xr-var-dims, .xr-var-dtype, .xr-preview, .xr-attrs dt { white-space: nowrap; overflow: hidden; text-overflow: ellipsis; padding-right: 10px; } .xr-var-name:hover, .xr-var-dims:hover, .xr-var-dtype:hover, .xr-attrs dt:hover { overflow: visible; width: auto; z-index: 1; } .xr-var-attrs, .xr-var-data, .xr-index-data { display: none; background-color: var(--xr-background-color) !important; padding-bottom: 5px !important; } .xr-var-attrs-in:checked ~ .xr-var-attrs, .xr-var-data-in:checked ~ .xr-var-data, .xr-index-data-in:checked ~ .xr-index-data { display: block; } .xr-var-data > table { float: right; } .xr-var-name span, .xr-var-data, .xr-index-name div, .xr-index-data, .xr-attrs { padding-left: 25px !important; } .xr-attrs, .xr-var-attrs, .xr-var-data, .xr-index-data { grid-column: 1 / -1; } dl.xr-attrs { padding: 0; margin: 0; display: grid; grid-template-columns: 125px auto; } .xr-attrs dt, .xr-attrs dd { padding: 0; margin: 0; float: left; padding-right: 10px; width: auto; } .xr-attrs dt { font-weight: normal; grid-column: 1; } .xr-attrs dt:hover span { display: inline-block; background: var(--xr-background-color); padding-right: 10px; } .xr-attrs dd { grid-column: 2; white-space: pre-wrap; word-break: break-all; } .xr-icon-database, .xr-icon-file-text2, .xr-no-icon { display: inline-block; vertical-align: middle; width: 1em; height: 1.5em !important; stroke-width: 0; stroke: currentColor; fill: currentColor; } </style>
<xarray.Dataset>
Dimensions:    (x: 750, y: 340, z: 1916, timestep: 31, feature: 69)
Coordinates:
  * x          (x) int64 0 1 2 3 4 5 6 7 8 ... 742 743 744 745 746 747 748 749
  * y          (y) int64 0 1 2 3 4 5 6 7 8 ... 332 333 334 335 336 337 338 339
  * z          (z) int64 0 1 2 3 4 5 6 7 ... 1909 1910 1911 1912 1913 1914 1915
  * timestep   (timestep) int64 0 1 2 3 4 5 6 7 8 ... 22 23 24 25 26 27 28 29 30
  * feature    (feature) <U27 'diff_to_first_' ... 'full_temp_min_Gauss_2.0'
Data variables:
    segmented  (x, y, z, timestep) uint8 0 0 0 2 2 2 0 0 0 ... 0 0 0 0 0 0 0 0 0
    t_utc      (timestep) float64 1.667e+09 1.667e+09 ... 1.667e+09 1.667e+09
    time       (timestep) float64 7.036 6.218 66.42 126.6 ... 759.3 819.5 879.7
Attributes: (12/20)
    name:                                              1
    voxel size:                                        2.75 um
    voxel:                                             2.75e-06
    post rotation cropping coordinates [a:b,c:d,e:f]:  [ 120 1062  320  972  ...
    rotation angle 1:                                  -22
    rotation angle 2:                                  -22
    ...                                                ...
    05_crop_githash:                                   761a49fa1ea416a28344b9...
    git_sha_registration:                              612e92b
    githash_registration:                              612e92b28895745f6b422c...
    05_ML_cropping:                                    [120, 870, 10, 350, 50...
    pytrain_git:                                       e5b2d83
    05_coely_gitsha:                                   1c033ce
xarray.Dataset
    • x: 750
    • y: 340
    • z: 1916
    • timestep: 31
    • feature: 69
    • x
      (x)
      int64
      0 1 2 3 4 5 ... 745 746 747 748 749
      array([  0,   1,   2, ..., 747, 748, 749])
    • y
      (y)
      int64
      0 1 2 3 4 5 ... 335 336 337 338 339
      array([  0,   1,   2, ..., 337, 338, 339])
    • z
      (z)
      int64
      0 1 2 3 4 ... 1912 1913 1914 1915
      array([   0,    1,    2, ..., 1913, 1914, 1915])
    • timestep
      (timestep)
      int64
      0 1 2 3 4 5 6 ... 25 26 27 28 29 30
      array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
             18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30])
    • feature
      (feature)
      <U27
      'diff_to_first_' ... 'full_temp_...
      array(['diff_to_first_', 'diff_to_last_', 'Gaussian_4D_Blur_0.0',
             'Gaussian_4D_Blur_2.0', 'Gaussian_4D_Blur_4.0',
             'diff_of_gauss_4D_2.0_0.0', 'diff_of_gauss_4D_4.0_0.0',
             'diff_of_gauss_4D_4.0_2.0', 'Gradient_sigma_0.0_0',
             'Gradient_sigma_0.0_1', 'Gradient_sigma_0.0_2', 'Gradient_sigma_0.0_3',
             'hessian_sigma_0.0_00', 'hessian_sigma_0.0_01', 'hessian_sigma_0.0_02',
             'hessian_sigma_0.0_03', 'hessian_sigma_0.0_11', 'hessian_sigma_0.0_12',
             'hessian_sigma_0.0_13', 'hessian_sigma_0.0_22', 'hessian_sigma_0.0_23',
             'hessian_sigma_0.0_33', 'Gradient_sigma_2.0_0', 'Gradient_sigma_2.0_1',
             'Gradient_sigma_2.0_2', 'Gradient_sigma_2.0_3', 'hessian_sigma_2.0_00',
             'hessian_sigma_2.0_01', 'hessian_sigma_2.0_02', 'hessian_sigma_2.0_03',
             'hessian_sigma_2.0_11', 'hessian_sigma_2.0_12', 'hessian_sigma_2.0_13',
             'hessian_sigma_2.0_22', 'hessian_sigma_2.0_23', 'hessian_sigma_2.0_33',
             'Gradient_sigma_4.0_0', 'Gradient_sigma_4.0_1', 'Gradient_sigma_4.0_2',
             'Gradient_sigma_4.0_3', 'hessian_sigma_4.0_00', 'hessian_sigma_4.0_01',
             'hessian_sigma_4.0_02', 'hessian_sigma_4.0_03', 'hessian_sigma_4.0_11',
             'hessian_sigma_4.0_12', 'hessian_sigma_4.0_13', 'hessian_sigma_4.0_22',
             'hessian_sigma_4.0_23', 'hessian_sigma_4.0_33', 'Gaussian_time_0.0',
             'Gaussian_time_2.0', 'Gaussian_time_4.0', 'diff_of_gauss_time_2.0_0.0',
             'diff_of_gauss_time_4.0_0.0', 'diff_of_gauss_time_4.0_2.0',
             'Gaussian_space_0.0', 'Gaussian_space_2.0', 'Gaussian_space_4.0',
             'diff_of_gauss_space_2.0_0.0', 'diff_of_gauss_space_4.0_0.0',
             'diff_of_gauss_space_4.0_2.0', 'diff_to_min_',
             'diff_temp_min_Gauss_2.0', 'first_', 'last_', 'full_temp_mean_',
             'full_temp_min_', 'full_temp_min_Gauss_2.0'], dtype='<U27')
    • segmented
      (x, y, z, timestep)
      uint8
      0 0 0 2 2 2 0 0 ... 0 0 0 0 0 0 0 0
      array([[[[0, 0, 0, ..., 1, 1, 1],
               [0, 0, 0, ..., 1, 1, 1],
               [0, 0, 0, ..., 1, 1, 1],
               ...,
               [2, 2, 2, ..., 1, 1, 1],
               [2, 2, 2, ..., 1, 1, 1],
               [2, 2, 0, ..., 1, 1, 1]],
      
              [[0, 0, 0, ..., 1, 1, 1],
               [0, 0, 0, ..., 1, 1, 1],
               [0, 0, 0, ..., 1, 1, 1],
               ...,
               [2, 2, 2, ..., 1, 1, 1],
               [2, 2, 2, ..., 1, 1, 1],
               [2, 2, 0, ..., 1, 1, 1]],
      
              [[2, 2, 0, ..., 1, 1, 1],
               [2, 2, 0, ..., 1, 1, 1],
               [2, 2, 0, ..., 1, 1, 1],
               ...,
      ...
               ...,
               [0, 0, 0, ..., 2, 0, 0],
               [0, 0, 0, ..., 2, 0, 0],
               [0, 0, 0, ..., 0, 0, 0]],
      
              [[0, 0, 0, ..., 0, 0, 0],
               [0, 0, 0, ..., 0, 0, 0],
               [0, 0, 0, ..., 0, 0, 0],
               ...,
               [0, 0, 0, ..., 0, 0, 0],
               [0, 0, 0, ..., 0, 0, 0],
               [0, 0, 0, ..., 0, 0, 0]],
      
              [[0, 0, 0, ..., 0, 0, 0],
               [0, 0, 0, ..., 0, 0, 0],
               [0, 0, 0, ..., 0, 0, 0],
               ...,
               [0, 0, 0, ..., 0, 0, 0],
               [0, 2, 0, ..., 0, 0, 0],
               [2, 0, 0, ..., 0, 0, 0]]]], dtype=uint8)
    • t_utc
      (timestep)
      float64
      1.667e+09 1.667e+09 ... 1.667e+09
      array([1.66722001e+09, 1.66722040e+09, 1.66722046e+09, 1.66722052e+09,
             1.66722058e+09, 1.66722064e+09, 1.66722070e+09, 1.66722076e+09,
             1.66722082e+09, 1.66722088e+09, 1.66722095e+09, 1.66722101e+09,
             1.66722107e+09, 1.66722113e+09, 1.66722119e+09, 1.66722125e+09,
             1.66722142e+09, 1.66722148e+09, 1.66722154e+09, 1.66722160e+09,
             1.66722166e+09, 1.66722172e+09, 1.66722178e+09, 1.66722184e+09,
             1.66722190e+09, 1.66722196e+09, 1.66722202e+09, 1.66722209e+09,
             1.66722215e+09, 1.66722221e+09, 1.66722227e+09])
    • time
      (timestep)
      float64
      7.036 6.218 66.42 ... 819.5 879.7
      array([  7.0355083,   6.2176226,  66.424704 , 126.6437905, 186.8748772,
             247.0959636, 307.2980496, 367.5261356, 427.75722  , 487.9903053,
             548.1683904, 608.3904758, 668.6175608, 728.8406441, 789.0717297,
             849.2918141,  36.6449636,  96.8650426, 157.0971262, 217.3022106,
             277.5122954, 337.7453791, 397.9744636, 458.1985478, 518.4136323,
             578.6417168, 638.8698003, 699.1018843, 759.3249683, 819.5310523,
             879.7401363])
    • x
      PandasIndex
      PandasIndex(Int64Index([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
                  ...
                  740, 741, 742, 743, 744, 745, 746, 747, 748, 749],
                 dtype='int64', name='x', length=750))
    • y
      PandasIndex
      PandasIndex(Int64Index([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
                  ...
                  330, 331, 332, 333, 334, 335, 336, 337, 338, 339],
                 dtype='int64', name='y', length=340))
    • z
      PandasIndex
      PandasIndex(Int64Index([   0,    1,    2,    3,    4,    5,    6,    7,    8,    9,
                  ...
                  1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 1915],
                 dtype='int64', name='z', length=1916))
    • timestep
      PandasIndex
      PandasIndex(Int64Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
                  17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30],
                 dtype='int64', name='timestep'))
    • feature
      PandasIndex
      PandasIndex(Index(['diff_to_first_', 'diff_to_last_', 'Gaussian_4D_Blur_0.0',
             'Gaussian_4D_Blur_2.0', 'Gaussian_4D_Blur_4.0',
             'diff_of_gauss_4D_2.0_0.0', 'diff_of_gauss_4D_4.0_0.0',
             'diff_of_gauss_4D_4.0_2.0', 'Gradient_sigma_0.0_0',
             'Gradient_sigma_0.0_1', 'Gradient_sigma_0.0_2', 'Gradient_sigma_0.0_3',
             'hessian_sigma_0.0_00', 'hessian_sigma_0.0_01', 'hessian_sigma_0.0_02',
             'hessian_sigma_0.0_03', 'hessian_sigma_0.0_11', 'hessian_sigma_0.0_12',
             'hessian_sigma_0.0_13', 'hessian_sigma_0.0_22', 'hessian_sigma_0.0_23',
             'hessian_sigma_0.0_33', 'Gradient_sigma_2.0_0', 'Gradient_sigma_2.0_1',
             'Gradient_sigma_2.0_2', 'Gradient_sigma_2.0_3', 'hessian_sigma_2.0_00',
             'hessian_sigma_2.0_01', 'hessian_sigma_2.0_02', 'hessian_sigma_2.0_03',
             'hessian_sigma_2.0_11', 'hessian_sigma_2.0_12', 'hessian_sigma_2.0_13',
             'hessian_sigma_2.0_22', 'hessian_sigma_2.0_23', 'hessian_sigma_2.0_33',
             'Gradient_sigma_4.0_0', 'Gradient_sigma_4.0_1', 'Gradient_sigma_4.0_2',
             'Gradient_sigma_4.0_3', 'hessian_sigma_4.0_00', 'hessian_sigma_4.0_01',
             'hessian_sigma_4.0_02', 'hessian_sigma_4.0_03', 'hessian_sigma_4.0_11',
             'hessian_sigma_4.0_12', 'hessian_sigma_4.0_13', 'hessian_sigma_4.0_22',
             'hessian_sigma_4.0_23', 'hessian_sigma_4.0_33', 'Gaussian_time_0.0',
             'Gaussian_time_2.0', 'Gaussian_time_4.0', 'diff_of_gauss_time_2.0_0.0',
             'diff_of_gauss_time_4.0_0.0', 'diff_of_gauss_time_4.0_2.0',
             'Gaussian_space_0.0', 'Gaussian_space_2.0', 'Gaussian_space_4.0',
             'diff_of_gauss_space_2.0_0.0', 'diff_of_gauss_space_4.0_0.0',
             'diff_of_gauss_space_4.0_2.0', 'diff_to_min_',
             'diff_temp_min_Gauss_2.0', 'first_', 'last_', 'full_temp_mean_',
             'full_temp_min_', 'full_temp_min_Gauss_2.0'],
            dtype='object', name='feature'))
  • name :
    1
    voxel size :
    2.75 um
    voxel :
    2.75e-06
    post rotation cropping coordinates [a:b,c:d,e:f] :
    [ 120 1062 320 972 0 2016]
    rotation angle 1 :
    -22
    rotation angle 2 :
    -22
    git_sha_rotation :
    2e4dec6
    githash_rotation :
    2e4dec6a6358ec0b6c95cac935d9648b716189a9
    image_data_names :
    <scan>_iamge_data_<time_step>, e.g. 02_image_data_00 is the first time step of scan 02
    03_crop_git_sha :
    761a49f
    03_crop_githash :
    761a49fa1ea416a28344b9f2e885a2e83f94996c
    04_crop_git_sha :
    761a49f
    04_crop_githash :
    761a49fa1ea416a28344b9f2e885a2e83f94996c
    05_crop_git_sha :
    761a49f
    05_crop_githash :
    761a49fa1ea416a28344b9f2e885a2e83f94996c
    git_sha_registration :
    612e92b
    githash_registration :
    612e92b28895745f6b422c556fb8aa7ad376baeb
    05_ML_cropping :
    [120, 870, 10, 350, 50, -50]
    pytrain_git :
    e5b2d83
    05_coely_gitsha :
    1c033ce
In [ ]: