Files
coely_tomcat_processing/06a_cathalyst_segmentation.ipynb

26 KiB

Segmentation of CL at anode to study "crumbling"

In [1]:
import xarray as xr
import os
import socket
import numpy as np
import matplotlib.pyplot as plt
import subprocess
from skimage import filters
import dask
import dask.array
from dask.distributed import  Client, LocalCluster

host = socket.gethostname()
if host == 'mpc2959.psi.ch':
    gitpath = '/mpc/homes/fische_r/lib/co2ely-tomcat'
    toppath = '/mpc/homes/fische_r/NAS/DASCOELY'
    toppathSSD = '/mnt/SSD/fische_r/COELY'
    temppath = '/mnt/SSD/fische_r/tmp'
    temppath_2 = '/mpc/homes/fische_r/NAS/tmp'
    training_path = '/mpc/homes/fische_r/NAS/DASCOELY/processing/05_water_GDL_ML/'
    memlim = '750GB'
elif host == 'mpc2053.psi.ch':
    gitpath = '/mpc/homes/fische_r/lib/co2ely-tomcat'
    toppath = '/mpc/homes/fische_r/NAS/DASCOELY'
    toppathSSD = os.path.join(toppath, 'processing')
    temppath = '/mnt/SSD_2TB_nvme0n1/Robert/tmp/'
    temppath_2 = '/mpc/homes/fische_r/NAS/tmp'
    training_path = '/mpc/homes/fische_r/NAS/DASCOELY/processing/05_membrane_ML/'
    memlim = '350GB'
else:
    print('host '+host+' currently not supported')
    
path_02_4D = os.path.join(toppathSSD, '02_registered_3p1D') #h5 with registered data
if not host=='mpc2959.psi.ch':
    path_02_4D = os.path.join(toppath, 'processing','02_registered_3p1D') #h5 with registered data

# fetch githash
cwd = os.getcwd()
os.chdir(gitpath)
git_sha = subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD']).decode().strip()
githash = subprocess.check_output(['git', 'rev-parse', 'HEAD']).decode().strip()
os.chdir(cwd)
In [2]:
tempfolder = temppath  #a big SSD is a major adavantage to allow spill to disk and still be efficient. large dataset might crash with too small SSD or be slow with normal HDD
# tempfolder = temppath_2
dask.config.config['temporary-directory'] = tempfolder
# dask.config.config['distributed']['worker']['memory']['recent-to-old-time'] = '200000s'

# here you have the option to use a virtual cluster or even slurm on ra (not attempted yet)
# cluster = LocalCluster(dashboard_address=':35000', memory_limit = memlim, n_workers=1) #settings optimised for mpc2959 and mpc2953, play around if needed, if you know nothing else is using RAM then you can almost go to the limit
# maybe less workers with more threads makes better use of shared memory 

# scheduler_port = 'tcp://129.129.188.222:8786' #<-- if scheduler on mpc2959; scheduler on mpc2053 -> 'tcp://129.129.188.248:8786'
# cluster = scheduler_port

# client = Client(cluster)
# client.amm.start()
# print('Dashboard at '+client.dashboard_link)

Data preparation

Use ImageJ to define ROI in registered 4D-Data including Anode CL+GDL and save in dict

In [3]:
a = 352
b = a+67
print(a,b)
352 419
In [4]:
anode_ROI = {
    '5II': (332, 451),
    '1': (313,380),
    '3III': (344, 344+105),
    '3II': (321, 321+106),
    '4II': (323, 323+104),
    '4': (342, 352+102),
    '5': (328, 328+100),
    '6': (344, 344+102),
    '7x': (328, 328+110),
    '8x': (338, 338+108)

}
In [5]:
th = 15000
thmax = 3*th #to exlude registration artifacts

Sample processing

load ROI of selected sample

In [6]:
i = 1
# sample = '8x'
sample = list(anode_ROI.keys())[i]
print(sample)
1
In [7]:
sample = '8x'
In [8]:
file = '02_'+sample+'_registered_3p1D.nc'
imagepath = os.path.join(path_02_4D, file)
data = xr.open_dataset(imagepath)
images = [im for im in data.keys() if im[3:7] == 'imag']
images.sort()
In [9]:
a,b = anode_ROI[sample]
shp = data[images[4]][ :,a:b, :].shape
shp = shp + (len(images),)
In [10]:
shp
Out[10]:
(750, 108, 2016, 53)
In [11]:
data.close()
In [10]:
im = np.zeros(shp, dtype = np.uint16)
In [11]:
for i in range(shp[-1]):
    if i%10==0:
        print(i)
    im[...,i] = data[images[i]][ :,a:b, :].data

data.close()
0
10
20
30
40
50
In [12]:
da = dask.array.from_array(im, chunks='auto')
In [13]:
da
Out[13]:
Array Chunk
Bytes 16.12 GiB 127.34 MiB
Shape (750, 108, 2016, 53) (108, 108, 108, 53)
Dask graph 133 chunks in 1 graph layer
Data type uint16 numpy.ndarray
750 1 53 2016 108

3D Gaussian Blur sigma=1

In [14]:
def Gaussian_Blur_space(da, sigma):      
    deptharray = np.ones(da.ndim)+4*sigma
    deptharray[-1] = 0
    sigmas = np.ones(deptharray.shape)*sigma
    deptharray = tuple(np.min([deptharray, da.shape], axis=0))

    sigmas[-1] = 0
    G = da.map_overlap(filters.gaussian, depth=deptharray, boundary='nearest', sigma = sigmas, preserve_range=True)
    return G
In [15]:
da[da>thmax] = 0
In [16]:
da = Gaussian_Blur_space(da, 1)
In [17]:
im = da.compute()

segment data

In [19]:
# segs = np.bitwise_and(im>th, im<thmax).astype(np.uint8)
segs = im>th

save to netcdf4 to be fed in Blender pipeline

In [20]:
outpath = os.path.join(toppath, 'processing','06_anode_CL', ''.join(['06_'+sample,'_anode_CL_segmented_filtered.nc']))
In [21]:
shp = segs.shape
segdata = xr.Dataset({'segmented': (['x','y','z','time'], segs)},
                               coords = {'x': np.arange(shp[0]),
                               'y': np.arange(shp[1]),
                               'z': np.arange(shp[2]),
                               'time': np.arange(shp[3])},
                     attrs = {'name': sample,
                             'cropping': [a,b]}
                     )
In [22]:
segdata.to_netcdf(outpath)
In [23]:
outpath
Out[23]:
'/mpc/homes/fische_r/NAS/DASCOELY/processing/06_anode_CL/06_8x_anode_CL_segmented_filtered.nc'

repeat for cathode as reference

In [20]:
a = 284
b = a+55
print(a,b)
284 339
In [21]:
cathode_ROI = {
    '5II': (262,338),
    '1': (284,339),
    '3III': (309, 309+39),
    '3II': (277, 277+43),
    '4II': (281, 281+44),
    '4': (306, 306+46),
    '5': (284, 284+46),
    '6': (304, 304+46),
    '7x': (284, 284+52),
    '8x': (288, 288+52)
    
}
In [22]:
th = 15000
thmax = 3*th #to exlude registration artifacts

load ROI of selected sample

In [23]:
file = '02_'+sample+'_registered_3p1D.nc'
imagepath = os.path.join(path_02_4D, file)
data = xr.open_dataset(imagepath)
images = [im for im in data.keys() if im[3:7] == 'imag']
images.sort()
In [24]:
a,b = cathode_ROI[sample]
shp = data[images[4]][ :,a:b, :].shape
shp = shp + (len(images),)
In [25]:
im = np.zeros(shp, dtype = np.uint16)
In [26]:
for i in range(shp[-1]):
    if i%10==0:
        print(i)
    im[...,i] = data[images[i]][ :,a:b, :].data
0
10
20
30
40
50
60
70
80
In [27]:
data.close()

segment data

In [28]:
segs = np.bitwise_and(im>th, im<thmax).astype(np.uint8)

save to netcdf4 to be fed in Blender pipeline

In [29]:
outpath = os.path.join(toppath, 'processing','06b_cathode_CL', ''.join(['06b_'+sample,'_cathode_CL_segmented.nc']))
In [30]:
shp = segs.shape
segdata = xr.Dataset({'segmented': (['x','y','z','time'], segs)},
                               coords = {'x': np.arange(shp[0]),
                               'y': np.arange(shp[1]),
                               'z': np.arange(shp[2]),
                               'time': np.arange(shp[3])},
                     attrs = {'name': sample,
                             'cropping': [a,b]}
                     )
In [31]:
segdata.to_netcdf(outpath)
In [150]:
outpath
Out[150]:
'/mpc/homes/fische_r/NAS/DASCOELY/processing/06b_cathode_CL/06b_8x_cathode_CL_segmented.nc'
In [ ]:
 
In [ ]:
 
In [ ]: