Files
pyTrainSeg/test.ipynb
T
2022-08-05 13:43:51 +02:00

1.5 MiB

Test with real image data and multiple filters

In [1]:
from skimage import io
import numpy as np
import matplotlib.pyplot as plt
from skimage import filters
from skimage import feature
from skimage.morphology import disk,ball
from sklearn.ensemble import RandomForestClassifier
from scipy import ndimage
import os
import imageio
import sys
In [2]:
im = io.imread(r"U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat\test_im.tif")
plt.imshow(im)
# "U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat\water_truth.tif"
Out[2]:
<matplotlib.image.AxesImage at 0x21a7fc3b1f0>
No description has been provided for this image

load label images iteratively optimized in trainable weka segmentation

In [3]:
air = io.imread(r"U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat\air_truth.tif")>0
water = io.imread(r"U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat\water_truth.tif")>0
fiber = io.imread(r"U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat\fiber_truth.tif")>0
truth = io.imread(r"U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat\test_truth.tif")

develop feature Stack creation as in trainable weka segmentaion (TWS)

TWS creates Gaussian Blurs for sigma=0,1,2,4,8,16,... (whatever the limit is)

on each sigma, a sobel and hessian filter is applied

for hessian one image each for hessian, hessian trace, hessian determinant, hessian eigenvalue 1 & 2, hessian orientation, hessian square eigenvalue difference, normalized eigenvalue difference

difference of gaussians for all: larger sigma - smaller

Filters in TWS

In [155]:
def TWS_gaussian(im, sig=0):
    G = filters.gaussian(im, sigma=sig, mode='reflect') #, preserve_range=True
    fullname = ''.join(['gaussian_',f'{sig:.1f}'])
    return G, fullname

def TWS_gaussian_stack(im, sigmas):
    fullnames = []
    gstack = np.zeros((im.shape[0],im.shape[1], len(sigmas)))
    for sig,i in zip(sigmas, range(len(sigmas))):
        # if np.abs(sig-0)<0.1:
        #     gstack[:,:,i] = im
        #     name = ''.join(['gaussian_',f'{0:.1f}'])
        # else:
        gstack[:,:,i], name = TWS_gaussian(im, sig)
        fullnames.append(name)
    return gstack, fullnames

def TWS_sobel(im, sig):
    #sigma is only passed to the name! make sure it's correct
    S = filters.sobel(im, mode='reflect')
    name = ''.join(['sobel_',f'{sig:.1f}'])
    return S, name

def TWS_sobel_stack(gstack, sigmas):
    Sstack = np.zeros(gstack.shape)
    fullnames = []
    for i in range(len(sigmas)):
        Sstack[:,:,i], name = TWS_sobel(gstack[:,:,i], sigmas[i])
        fullnames.append(name)
    return Sstack, fullnames

def TWS_hessian(im, sig):
    #creates 8 images per sigma
    #sigma is only passed to the name! make sure it's correct
    a, b, d =  feature.hessian_matrix(im, mode='reflect')
    c = b
    
    mod = np.sqrt(a**2+b*c+d**2)
    trace = a+d
    det = a*d-c*b
    eig1 = (a+d)/2 + np.sqrt((4*b**2+(a-d)**2)/2)
    eig2 = (a+d)/2 - np.sqrt((4*b**2+(a-d)**2)/2)
    
    gamma_norm_eig_diff = (a-d)**2*((a-d)**2+4*b**2)
    square_norm_eig_diff = ((a-d)**2+4*b**2) 
    
    orient = 0.5*np.arccos(4*b**2+(a-d)**2)
    hessian_stack = np.dstack([mod,trace,det,eig1,eig2,orient,gamma_norm_eig_diff,square_norm_eig_diff])
    names = ['module', 'trace', 'determinant', 'eigenvalue1', 'eigenvalue2', 'orientation', 'gamma_norm_eig_diff', 'square_norm_eig_diff']

    fullnames = []
    for name in names:
        fullname = ''.join(['hessian_',name,'_',f'{sig:.1f}'])
        fullnames.append(fullname)
    return hessian_stack, fullnames

def TWS_hessian_stack(gstack, sigmas):
    size = len(sigmas)*8
    Hstack = np.zeros((gstack.shape[0],gstack.shape[1], size))
    fullnames = []
    for i in range(len(sigmas)):
        Hstack[:,:,i*8:i*8+8], names = TWS_hessian(gstack[:,:,i],sigmas[i])
        fullnames = fullnames + names
    return Hstack, fullnames
        

def TWS_diff_of_gaussians(gstack, sigmas):
    #creates a stack of {size} (see below)
    n = len(sigmas)
    size = int(n*(n-1)/2)
    
    diff_stack = np.zeros((im.shape[0], im.shape[1], size))
    fullnames = []
    cc = 0
    for i in range(1,n):
        for j in range(i):
            DG = gstack[:,:,i]-gstack[:,:,j]
            diff_stack[:,:,cc] = DG
            name = ''.join(['diff_of_gauss_',f'{sigmas[i]:.1f}','_',f'{sigmas[j]:.1f}'])
            fullnames.append(name)
            cc = cc + 1
    return diff_stack, fullnames
            
            
def TWS_minimum(im, sigma):
    M = filters.rank.minimum(im, disk(sigma))
    fullname = ''.join(['minimum_',f'{sigma:.1f}'])
    return M, fullname

def TWS_minimum_stack(im, sigmas):
    size = len(sigmas)-1
    min_stack = np.zeros((im.shape[0], im.shape[1], size))
    fullnames = []
    i = 0
    for i in range(size):
        sig = sigmas[i+1]
        min_stack[:,:,i], fullname = TWS_minimum(im, sig)
        fullnames.append(fullname)
    return min_stack, fullnames

def TWS_maximum(im, sigma):
    M = filters.rank.maximum(im, disk(sigma))
    fullname = ''.join(['maximum_',f'{sigma:.1f}'])
    return M, fullname

def TWS_maximum_stack(im, sigmas):
    size = len(sigmas)-1
    max_stack = np.zeros((im.shape[0], im.shape[1], size))
    fullnames = []
    i = 0
    for i in range(size):
        sig = sigmas[i+1]
        max_stack[:,:,i], fullname = TWS_maximum(im, sig)
        fullnames.append(fullname)
    return max_stack, fullnames

def TWS_median(im, sigma):
    M = filters.rank.median(im, disk(sigma))
    fullname = ''.join(['median_',f'{sigma:.1f}'])
    return M, fullname

def TWS_median_stack(im, sigmas):
    size = len(sigmas)-1
    med_stack = np.zeros((im.shape[0], im.shape[1], size))
    fullnames = []
    i = 0
    for i in range(size):
        sig = sigmas[i+1]
        med_stack[:,:,i], fullname = TWS_median(im, sig)
        fullnames.append(fullname)
    return med_stack, fullnames
    

reverse engineered feature stack

In [3]:
def TWS_feature_stack(im, sigmas, feat_select):
        feat_names = []
        stack_list = []
    #TODO: allow ticking off features
    
        #gaussian filters
        if feat_select['Gaussian']:
            g_stack, gfeat = TWS_gaussian_stack(im, sigmas)
            stack_list.append(g_stack)
            feat_names = feat_names + gfeat
            
        #sobel filter on every gaussian sigma
        if feat_select['Sobel']:
            s_stack, sfeat = TWS_sobel_stack(g_stack, sigmas)
            stack_list.append(s_stack)
            feat_names = feat_names + sfeat
            
        #stack of hessian stacks for every sigma
        if feat_select['Hessian']:
            h_stack, hfeat = TWS_hessian_stack(g_stack, sigmas)
            stack_list.append(h_stack)
            feat_names = feat_names + hfeat
            
        #diff of gaussians
        if feat_select['Diff of Gaussians']:
            d_stack, dfeat = TWS_diff_of_gaussians(g_stack, sigmas)
            stack_list.append(d_stack)
            feat_names = feat_names + dfeat
        
        #minimum filters
        if feat_select['minimum']:
            min_stack, minfeat = TWS_minimum_stack(im, sigmas)
            stack_list.append(min_stack)
            feat_names = feat_names + minfeat

        #maximum filters
        if feat_select['maximum']:
            max_stack, maxfeat = TWS_maximum_stack(im, sigmas)
            stack_list.append(max_stack)
            feat_names = feat_names + maxfeat
        
        #median filters
        if feat_select['median']:
            med_stack, medfeat = TWS_median_stack(im, sigmas)
            stack_list.append(med_stack)
            feat_names = feat_names + medfeat
            
        feat_stack = np.dstack(stack_list)
                        
        return feat_stack, feat_names
        
        
        
    

Function to classify one slice automatically detecting phases in truth image

In [4]:
def label_data_slice(im, truth, sigmas, feat_select, feat_stack=None):
    #TODO: automatically detect phases in truth image and aovid overlap
    #TODO: define format of truth image
    phase1 = truth==1
    phase2 = truth==2
    phase3 = truth==4
    
    if feat_stack is None:
        feat_stack, feat_names = TWS_feature_stack(im, sigmas, feat_select)
    
    X1 = feat_stack[phase1]
    y1 = np.zeros(X1.shape[0])
    X2 = feat_stack[phase2]
    y2 = np.ones(X2.shape[0])
    X3 = feat_stack[phase3]
    y3 = 2*np.ones(X3.shape[0])

    y = np.concatenate([y1,y2,y3])
    X = np.concatenate([X1,X2,X3])
    
    return X,y, feat_stack
In [5]:
 def classify_and_plot(X,y,im, feat_stack, plot=True):
    # TODO: allow choice and manipulation of ML method
    clf =  RandomForestClassifier(n_estimators = 300, n_jobs=-1, random_state = 42, max_features=None) 
    clf.fit(X, y)
    num_feat = feat_stack.shape[2]
    ypred = clf.predict(feat_stack.reshape(-1,num_feat))
    result = ypred.reshape(im.shape).astype(np.uint8)
    if plot:
        fig, (ax1, ax2)= plt.subplots(1,2,figsize=(12,7))
        ax1.imshow(im, cmap='Greys_r')
        ax2.imshow(result)
    return result, clf
In [6]:
def slicewise_classify_for_training(im, slice_name,sigmas, feat_select, plot=True,  feat_stack=None, truth=None, training_dict=None): #, training_path, XTM_data_path
    #consider training data from other slices but do not simpliy append to avoid duplicates
    flag = False #TODO: get rid of flag
    if training_dict is not None:
        slices = list(training_dict.keys())
        if slice_name in slices: 
            slices.remove(slice_name)
        if len(slices)>0:
            flag = True
            Xall = training_dict[slices[0]][0]
            yall = training_dict[slices[0]][1]
            for i in range(1,len(slices)):
                Xall = np.concatenate([Xall, training_dict[slices[i]][0]])
                yall = np.concatenate([yall, training_dict[slices[i]][1]])
    
    
    if feat_stack is None:
        print('creating feature stack')
        X,y, feat_stack =  label_data_slice(im, truth, sigmas, feat_select)
    else:
        X,y, feat_stack =  label_data_slice(im, truth, sigmas, feat_select, feat_stack=feat_stack) 
    
    print('training and classifying')
    
    if training_dict is not None and flag:
        Xt = np.concatenate([Xall,X])
        yt = np.concatenate([yall,y])
        Xall = None
        yall = None
    else:
        Xt = X
        yt = y
    
    result, clf = classify_and_plot(Xt,yt,im, feat_stack, plot)
    
    # print('save slice result, retrain if needed')
    # imageio.imsave(os.path.join(training_path,''.join([slice_name,'_classified.tif'])), result)
    return X, y, feat_stack, clf, result

end of definitions

train classifier and plot result on "test" slice

In [9]:
sigmas = [0, 2,4,6,8]  #hard-coded for now, sobel and hessian require that first sigma is 0, diff, gaussian(sig=0) = 0

# default feature choice
feat_select = {'Gaussian': True,
               'Sobel': True,
               'Hessian': True,
               'Diff of Gaussians': True
              }
    
In [10]:
slice_name = 'test'
training_path = r"U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat"
In [10]:
im = io.imread(r"U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat\test_im.tif")
air = io.imread(r"U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat\air_truth.tif")>0
water = io.imread(r"U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat\water_truth.tif")>0
fiber = io.imread(r"U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat\fiber_truth.tif")>0
truth = air+water*2+fiber*4
In [12]:
X,y, feat_stack, clf = slicewise_classify_for_training(im, slice_name, truth=truth,training_path = r"U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat", XTM_data_path = r"U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat")
creating feature stack
training and classifying
save slice result
No description has been provided for this image
In [13]:
training_dict = {}
training_dict[slice_name] = (X,y, feat_stack)

proper training workflow for 3 phases air, water, fiber TODO: make general

In [14]:
XTM_data_path = r"D:\TOMCAT_2\01_intcorrect_med_leg_0"
training_path = r"U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat\training"


time_folder = os.listdir(XTM_data_path)
timestep_folder = time_folder[0]
images_first = os.listdir(os.path.join(XTM_data_path, timestep_folder))
In [37]:
# randomly suggest slice for training
num_ts = len(time_folder)
num_slices = len(images_first)

ts = np.random.choice(range(num_ts))+1
print('try time step ',ts )
sn = np.random.choice(range(num_slices))+1
print('try slice number ', sn)

slice_name= ''.join(['ts_',str(ts),'_slice_',str(sn)])
watername = ''.join([slice_name, '_water.tif'])
waterpath = os.path.join(training_path, watername)
if not os.path.exists(waterpath):
    print('create missing training set with ImageJ-script!')
try time step  9
try slice number  326
create missing training set with ImageJ-script!
In [27]:
time_step = 15
slice_number = 519

time_folder = os.listdir(XTM_data_path)
timestep_folder = time_folder[time_step]
images = os.listdir(os.path.join(XTM_data_path, timestep_folder))
image_name = images[slice_number]
im = io.imread(os.path.join(XTM_data_path, timestep_folder, image_name))

slice_name= ''.join(['ts_',str(time_step),'_slice_',str(slice_number)])
watername = ''.join([slice_name, '_water.tif'])
waterpath = os.path.join(training_path, watername)
airname = ''.join([slice_name, '_air.tif'])
airpath = os.path.join(training_path, airname)
fibername = ''.join([slice_name, '_fiber.tif'])
fiberpath = os.path.join(training_path, fibername)

air = io.imread(airpath)>0
water = io.imread(waterpath)>0
fiber = io.imread(fiberpath)>0
truth = air+water*2+fiber*4

if slice_name in training_dict.keys():
    X,y, feat_stack, clf = slicewise_classify_for_training(im, slice_name, XTM_data_path=XTM_data_path, training_path=training_path, feat_stack=training_dict[slice_name][2], truth=truth, training_dict=training_dict)
else:
    X,y, feat_stack, clf = slicewise_classify_for_training(im, slice_name, XTM_data_path=XTM_data_path, training_path=training_path, truth=truth, training_dict=training_dict)

training_dict[slice_name] = (X,y, feat_stack)
print('training dict contains ',len(training_dict.keys()),'entries, keep track of memory')
training and classifying
save slice result
No description has been provided for this image
In [31]:
### make test feature_stack and names
_, feat_names = TWS_feature_stack(im, sigmas)
In [32]:
plt.figure( figsize=(16,9))
plt.plot(feat_names,clf.feature_importances_,'x')
plt.xticks(rotation=90)
Out[32]:
([0,
  1,
  2,
  3,
  4,
  5,
  6,
  7,
  8,
  9,
  10,
  11,
  12,
  13,
  14,
  15,
  16,
  17,
  18,
  19,
  20,
  21,
  22,
  23,
  24,
  25,
  26,
  27,
  28,
  29,
  30,
  31,
  32,
  33,
  34,
  35,
  36,
  37,
  38,
  39,
  40,
  41,
  42,
  43,
  44,
  45,
  46,
  47,
  48,
  49,
  50,
  51,
  52,
  53,
  54,
  55,
  56,
  57,
  58,
  59],
 [Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, '')])
No description has been provided for this image

segment wood time series

In [7]:
XTM_data_path = r"C:\Zwischenlager\wood_time_slices\00_raw"
training_path = r"C:\Zwischenlager\wood_time_slices\training_data"


time_folder = os.listdir(XTM_data_path)
timestep_folder = time_folder[0]
images_first = os.listdir(os.path.join(XTM_data_path, timestep_folder))
In [55]:
sigmas = [0, 2,4]  #hard-coded for now, sobel and hessian require that first sigma is 0, diff, gaussian(sig=0) = 0

# default feature choice
feat_select = {'Gaussian': True,
               'Sobel': True,
               'Hessian': True,
               'Diff of Gaussians': True,
               'maximum': True,
               'minimum': True,
               'median': True
              }
    
In [27]:
feat_select
Out[27]:
{'Gaussian': True,
 'Sobel': True,
 'Hessian': True,
 'Diff of Gaussians': True,
 'maximum': True,
 'minimum': True,
 'median': True}
In [56]:
training_dict = {}

iteratative loop

In [11]:
# randomly suggest slice for training
num_ts = len(time_folder)
num_slices = len(images_first)

ts = np.random.choice(range(num_ts))+1
print('try time step ',ts )
sn = np.random.choice(range(num_slices))+1
print('try slice number ', sn)

slice_name= ''.join(['ts_',str(ts),'_slice_',str(sn)])
watername = ''.join([slice_name, '_water.tif'])
waterpath = os.path.join(training_path, watername)
if not os.path.exists(waterpath):
    print('create missing training set with ImageJ-script!')
try time step  6
try slice number  87
create missing training set with ImageJ-script!
In [57]:
time_step = 5
slice_number = 54

time_folder = os.listdir(XTM_data_path)
timestep_folder = time_folder[time_step]
images = os.listdir(os.path.join(XTM_data_path, timestep_folder))
image_name = images[slice_number]
im = io.imread(os.path.join(XTM_data_path, timestep_folder, image_name))

slice_name= ''.join(['ts_',str(time_step),'_slice_',str(slice_number)])
watername = ''.join([slice_name, '_water.tif'])
waterpath = os.path.join(training_path, watername)
airname = ''.join([slice_name, '_air.tif'])
airpath = os.path.join(training_path, airname)
fibername = ''.join([slice_name, '_fiber.tif'])
fiberpath = os.path.join(training_path, fibername)

air = io.imread(airpath)>0
water = io.imread(waterpath)>0
fiber = io.imread(fiberpath)>0
truth = air+water*2+fiber*4

if slice_name in training_dict.keys():
    X,y, feat_stack, clf = slicewise_classify_for_training(im, slice_name,sigmas,XTM_data_path, training_path, feat_select,  feat_stack=training_dict[slice_name][2], truth=truth, training_dict=training_dict)
else:
    X,y, feat_stack, clf = slicewise_classify_for_training(im, slice_name,sigmas, XTM_data_path, training_path, feat_select, truth=truth, training_dict=training_dict)

training_dict[slice_name] = (X,y, feat_stack)
print('training dict contains ',len(training_dict.keys()),'entries, keep track of memory')
creating feature stack
C:\Users\fische_r\Miniconda3\envs\pyweka\lib\site-packages\skimage\filters\rank\generic.py:262: UserWarning: Bad rank filter performance is expected due to a large number of bins (38360), equivalent to an approximate bitdepth of 15.2.
  image, footprint, out, mask, n_bins = _preprocess_input(image, footprint,
training and classifying
save slice result, retrain if needed
training dict contains  1 entries, keep track of memory
No description has been provided for this image
In [58]:
### make test feature_stack and names
_, feat_names = TWS_feature_stack(im, sigmas, feat_select)
C:\Users\fische_r\Miniconda3\envs\pyweka\lib\site-packages\skimage\filters\rank\generic.py:262: UserWarning: Bad rank filter performance is expected due to a large number of bins (38360), equivalent to an approximate bitdepth of 15.2.
  image, footprint, out, mask, n_bins = _preprocess_input(image, footprint,
In [29]:
len(feat_names)
Out[29]:
55
In [59]:
plt.figure( figsize=(16,9))
plt.stem(feat_names,clf.feature_importances_,'x')
plt.xticks(rotation=90)
plt.ylabel('importance')
C:\Users\fische_r\AppData\Local\Temp\ipykernel_12928\1483009324.py:2: MatplotlibDeprecationWarning: Passing the linefmt parameter positionally is deprecated since Matplotlib 3.5; the parameter will become keyword-only two minor releases later.
  plt.stem(feat_names,clf.feature_importances_,'x')
Out[59]:
Text(0, 0.5, 'importance')
No description has been provided for this image
In [40]:
import robpylib
In [68]:
feat_files = []
for feat in feat_names:
    feat_files.append(feat+'.tif')
In [69]:
robpylib.CommonFunctions.ImportExport.WriteStackNew(r"C:\Zwischenlager\wood_time_slices\pywekastack", feat_files, feat_stack)
In [44]:
feat_names[0]
Out[44]:
'gaussian_0.0'

wood series with training within jupyter

test on single image, for now 8bit version necessary for display

In [7]:
from ipywidgets import Image
from ipywidgets import ColorPicker, IntSlider, link, AppLayout, HBox
from ipycanvas import RoughCanvas, hold_canvas, Canvas, MultiCanvas
In [8]:
import os
from skimage import io
import matplotlib.pyplot as plt
import numpy as np
im8 = io.imread(r"C:\Zwischenlager\wood_time_slices\8bit_test.tif")
im = io.imread(r"C:\Zwischenlager\wood_time_slices\16bit_test.tif")
truthpath =  r"C:\Zwischenlager\wood_time_slices\tst_truth.tif"
plt.imshow(im8)
Out[8]:
<matplotlib.image.AxesImage at 0x13550e06620>
No description has been provided for this image
In [9]:
resultim = np.zeros(im.shape, dtype=np.uint8)
if os.path.exists(truthpath):
    truth = io.imread(truthpath)
    print('existing label set loaded')
else:
    truth = resultim.copy()
slice_name = 'test'
In [10]:
sigmas = [0, 2,4, 8]  #hard-coded for now, sobel and hessian require that first sigma is 0, diff, gaussian(sig=0) = 0

# default feature choice
feat_select = {'Gaussian': True,
               'Sobel': True,
               'Hessian': True,
               'Diff of Gaussians': True,
               'maximum': True,
               'minimum': True,
               'median': True
              }
    
training_dict = {}

Training cells

In [14]:
width = im8.shape[1]
height = im8.shape[0]

Mcanvas = MultiCanvas(4, width=width, height=height)
background = Mcanvas[0]
resultdisplay = Mcanvas[2]
truthdisplay = Mcanvas[1]
canvas = Mcanvas[3]
canvas.sync_image_data = True

drawing = False
position = None
shape = []


def on_mouse_down(x, y):
    global drawing
    global position
    global shape

    drawing = True
    position = (x, y)
    shape = [position]


def on_mouse_move(x, y):
    global drawing
    global position
    global shape

    if not drawing:
        return

    with hold_canvas():
        canvas.stroke_line(position[0], position[1], x, y)

        position = (x, y)

    shape.append(position)


def on_mouse_up(x, y):
    global drawing
    global position
    global shape

    drawing = False

    with hold_canvas():
        canvas.stroke_line(position[0], position[1], x, y)
        canvas.fill_polygon(shape)

    shape = []

image_data = np.stack((im8, im8, im8), axis=2)
background.put_image_data(image_data, 0, 0)

# alpha = 0.15
resultdisplay.global_alpha = 0.15
# result_data = np.stack((255*(resultim==0), 255*(resultim==1), 255*(resultim==2)), axis=2)
if np.any(resultim>0):
    result_data = np.stack((255*(resultim==0), 255*(resultim==1), 255*(resultim==2)), axis=2)
else:
    result_data = np.stack((0*resultim, 0*resultim, 0*resultim), axis=2)
resultdisplay.put_image_data(result_data, 0, 0)

# truth_data = np.stack((255*(truth==1), 2555*(truth==2), 2555*(truth==4)), axis=2)
# truthdisplay.put_image_data(truth_data, 0, 0) 
# truthdisplay.global_alpha = 0.05

canvas.on_mouse_down(on_mouse_down)
canvas.on_mouse_move(on_mouse_move)
canvas.on_mouse_up(on_mouse_up)

# canvas.stroke_style = "#749cb8"

# canvas.global_alpha = 0.75

picker = ColorPicker(description="Color:", value="#ff0000")
slidealpha = IntSlider(description="Result overlay", value=0.15)

link((picker, "value"), (canvas, "stroke_style"))
link((picker, "value"), (canvas, "fill_style"))
# link((slidealpha, "value"), (resultdisplay, "global_alpha"))

HBox((Mcanvas, picker, slidealpha))
#print('paint image with #ff0000 for air, #00ff00 for water and #0000ff for fiber')
HBox(children=(MultiCanvas(height=690, width=744), ColorPicker(value='#ff0000', description='Color:'), IntSlid…
In [12]:
#create truth image from image, save to file
label_set = canvas.get_image_data()

truth[label_set[:,:,0]>0] = 1
truth[label_set[:,:,1]>0] = 2
truth[label_set[:,:,2]>0] = 4

imageio.imsave(truthpath, truth)
In [13]:
if slice_name in training_dict.keys():
    X,y, feat_stack, clf, resultim = slicewise_classify_for_training(im, slice_name,sigmas, feat_select,  feat_stack=training_dict[slice_name][2], truth=truth, training_dict=training_dict)
else:
    X,y, feat_stack, clf, resultim = slicewise_classify_for_training(im, slice_name,sigmas, feat_select, truth=truth, training_dict=training_dict) #XTM_data_path, training_path, 

training_dict[slice_name] = (X,y, feat_stack)
print('training dict contains ',len(training_dict.keys()),'entries, keep track of memory')
creating feature stack
C:\Users\fische_r\Miniconda3\envs\pyweka\lib\site-packages\skimage\filters\rank\generic.py:262: UserWarning: Bad rank filter performance is expected due to a large number of bins (40107), equivalent to an approximate bitdepth of 15.3.
  image, footprint, out, mask, n_bins = _preprocess_input(image, footprint,
training and classifying
training dict contains  1 entries, keep track of memory
No description has been provided for this image
In [61]:
### make test feature_stack and names
_, feat_names = TWS_feature_stack(im, sigmas, feat_select)
C:\Users\fische_r\Miniconda3\envs\pyweka\lib\site-packages\skimage\filters\rank\generic.py:262: UserWarning: Bad rank filter performance is expected due to a large number of bins (40107), equivalent to an approximate bitdepth of 15.3.
  image, footprint, out, mask, n_bins = _preprocess_input(image, footprint,
In [84]:
plt.figure( figsize=(16,9))
plt.stem(feat_names,clf.feature_importances_,'x')
plt.xticks(rotation=90)
plt.ylabel('importance')
C:\Users\fische_r\AppData\Local\Temp\ipykernel_13852\1483009324.py:2: MatplotlibDeprecationWarning: Passing the linefmt parameter positionally is deprecated since Matplotlib 3.5; the parameter will become keyword-only two minor releases later.
  plt.stem(feat_names,clf.feature_importances_,'x')
Out[84]:
Text(0, 0.5, 'importance')
No description has been provided for this image

4D Filters

In [3]:
from skimage import io
import numpy as np
import matplotlib.pyplot as plt
from skimage import filters
from skimage import feature
from skimage.morphology import disk,ball
# from sklearn.ensemble import RandomForestClassifier
from scipy import ndimage
import os
import imageio
import sys
In [4]:
import dask
import dask.array
# import cupy as cp
# import cucim
import numpy as np
import matplotlib.pyplot as plt
from itertools import combinations_with_replacement
import xarray as xr
In [5]:
array_4D = None
gauss_4D = None
AS = 200
# array_4D = cp.random.random((AS,AS,AS,AS))
array_4Dnp = np.random.random((AS,AS,AS,AS))
%timeit -n 2 gauss_4D = cucim.skimage.filters.gaussian(array_4D, sigma = 2) gauss_4D = Nonedef custom_GPU_Gaussian(array, sigma=1): arraycp = cp.array(array) arraycp = cucim.skimage.filters.gaussian(arraycp, sigma=sigma) array = cp.asnumpy(arraycp) return array

RAM limited GPU accelaration outperformed by massive CPU parallelization

--> do dask parallelization on cpu chunking array in a way to use as many cores as possible
can entire 5D-feature stack fit into memory? data streaming necessary at one point
features

  1. Gaussian
  2. Minimum
  3. Maximum
  4. (Median)
  5. (Hessian)
  6. (Sobel)

what about hessian matrix --> use elements of nd-H-matrix and n eigenvalue?!
Sobel as edge filter in 2D like hessian or other edge extraction filter?

In [7]:
# shows order of hessian elements
axes = range(array_4Dnp.ndim)
for ax0, ax1 in combinations_with_replacement(axes, 2):
    print(ax0, ax1)
0 0
0 1
0 2
0 3
1 1
1 2
1 3
2 2
2 3
3 3
In [8]:
# functions take chunked dask-array as input
def nd_gaussian(da, sig = 0):
    if np.abs(sig-0)<0.1:
        G = np.array(da)
    else:
        G = da.map_overlap(filters.gaussian, depth=4*sig+1, sigma = sig).compute()
    fullname = ''.join(['gaussian_',f'{sig:.1f}'])
    return G, fullname

#TODO create a class that makes the feature stacks
def nd_gaussian_stack(da, sigmas):
    fullnames = []
    gstack = np.zeros(list(da.shape) + [len(sigmas)])
    for sig,i in zip(sigmas, range(len(sigmas))):
        gstack[...,i], name = nd_gaussian(da, sig)
        fullnames.append(name)
    return gstack, fullnames
In [9]:
def nd_diff_of_gaussian(gstack, sigmas):
#     #creates a stack of {size} (see below)
    n = len(sigmas)
    size = int(n*(n-1)/2)
    dstack = np.zeros(list(da.shape) + [size])
    fullnames = []
    cc = 0
    for i in range(1,n):
        for j in range(i):
            dstack[...,cc] = gstack[...,i] - gstack[...,j]
            name = ''.join(['diff_of_gauss_',f'{sigmas[i]:.1f}','_',f'{sigmas[j]:.1f}'])
            fullnames.append(name)
            cc = cc + 1
    return dstack, fullnames
In [10]:
def ball_4d(sig):
    bnd = np.zeros((sig*2+1,sig*2+1,sig*2+1,sig*2+1), dtype = bool)
    bnd[sig,sig,sig,sig] = True
    ecd = ndimage.distance_transform_edt(~bnd)
    bnd = (ecd<sig+0.01).astype(int)
    return bnd
In [11]:
def nd_rank_like_filter(da, sigma, option):
    """
     input
     da - chunked das array up to 4D
     sigma - kernel size, scalar
     option, str ('minimum', 'maximum', 'median')
    """
    if da.ndim == 2:
        fp = disk(sigma)
    if da.ndim == 3:
        fp = ball(sigma)
    if da.ndim == 4:
        fp = ball_4d(sigma)
        
    if option == 'minimum':
        fun = ndimage.minimum_filter
    elif option == 'maximum':
        fun = ndimage.maximum_filter
    elif option == 'median':
        fun = ndimage.median_filter
    else:
        print(option+' not available')
    M = da.map_overlap(fun, depth=sigma+1, footprint=fp).compute()
    fullname = ''.join([option,'_',f'{sigma:.1f}'])
    return M, fullname

def nd_rank_like_stack(da, sigmas, option):
    fullnames = []
    mstack = np.zeros(list(da.shape) + [len(sigmas)-1])
    for sig,i in zip(sigmas[1:], range(len(sigmas)-1)):
        mstack[...,i], name = nd_rank_like_filter(da, sig, option)
        fullnames.append(name)
    return mstack, fullnames   
    
In [12]:
def nd_Hessian_matrix(G):
    """
    copied from skimage.feature.hessian_matrix
    just directly using Gaussian fitered arrays and dask
    """
    
    daG = dask.array.from_array(G)
    gradients = dask.array.gradient(daG)
    axes = range(G.ndim)
    H_elems = [dask.array.gradient(gradients[ax0], axis=ax1).compute() for ax0, ax1 in combinations_with_replacement(axes, 2)]
    elems = [(ax0,ax1) for ax0, ax1 in combinations_with_replacement(axes, 2)]
    return H_elems, elems

def nd_Hessian_stack(G, sigma):
    H_elems, elems = nd_Hessian_matrix(G)
    hstack = np.zeros(list(G.shape)+[len(elems)])
    
    #TODO: this is slow, find some better numpy function
    for i in range(len(elems)):
        hstack[...,i] = H_elems[i]
    
    # print('got Hessian matrices, now doing the eigs')
    # eigs = feature.hessian_matrix_eigvals(H_elems) 
 # for now ignore the eigenvalues (too computationally expensive and H_elems already contains the image curvature  

    fullnames = []
    for i,j in elems:
        fullname = ''.join(['hessian_',str(i),str(j),'_',f'{sigma:.1f}'])
        fullnames.append(fullname) 
        
    return hstack, fullnames

def nd_Hessian_stacks(gstack, sigmas):
    flag = True
    fullnames = []
    for (i, sigma) in zip(range(gstack.shape[-1]), sigmas):
        a, b = nd_Hessian_stack(gstack[...,i], sigma)
        asize = a.shape[-1]
        if flag:
            flag = False
            hstacks = np.zeros(list(gstack[...,-1].shape)+[len(sigmas)*asize])
        hstacks[...,i*asize:i*asize+asize] = a
        fullnames = fullnames + b
    return hstacks, fullnames
In [13]:
def nd_feature_Stack(da, sigmas, feat_select):
#     TODO: make more elegant
    
    fstack = []
    featnames = []
    
    print('apply Gaussian filters anyway')
    gstack, gnames = nd_gaussian_stack(da, sigmas)
    
    if feat_select['Gaussian']:
        featnames = featnames + gnames
        fstack.append(gstack)
        
    if feat_select['Hessian']:
        print('get Hessian matrices')
        hstack, hnames = nd_Hessian_stacks(gstack, sigmas)
        featnames = featnames + hnames 
        fstack.append(hstack)
        
    if feat_select['Diff of Gaussians']:
        print('get differences of Gaussians')
        dstack, dnames = nd_diff_of_gaussian(gstack, sigmas)
        featnames = featnames + dnames
        fstack.append(dstack)
        
    if feat_select['maximum']:
        print('apply maximum filters')
        maxstack, maxnames = nd_rank_like_stack(da, sigmas, option='maximum')
        featnames = featnames + maxnames
        fstack.append(maxstack)
        
    if feat_select['median']:
        print('apply median filters')
        medstack, mednames = nd_rank_like_stack(da, sigmas, option='median')
        featnames = featnames + mednames
        fstack.append(medstack)
        
    if feat_select['minimum']:
        print('apply minimum filters')
        minstack, minnames = nd_rank_like_stack(da, sigmas, option='minimum')
        featnames = featnames + minnames
        fstack.append(minstack)
        
    return np.concatenate(fstack, axis=-1), featnames
In [26]:
def feat_stack_to_nc(fstack, featnames, path = None):
    #TODO: include metadata and
    data = xr.Dataset({'feature_stack': (['x','y','z','time', 'feature'], fstack)},
                       coords = {'x': np.arange(fstack.shape[0]),
                                 'y': np.arange(fstack.shape[1]),
                                 'z': np.arange(fstack.shape[2]),
                                 'time': np.arange(fstack.shape[3]),
                                 'feature': featnames},
                      attrs = {'name': 'test'})
    if path is not None:
        data.to_netcdf(path)
    return data
    
In [15]:
sigmas = [0, 2,4, 8]  #hard-coded for now, sobel and hessian require that first sigma is 0, diff, gaussian(sig=0) = 0

# default feature choice
feat_select = {'Gaussian': True, 
               # 'Sobel': True,
               'Hessian': True,
               'Diff of Gaussians': True,
               'maximum': True,
               'minimum': True,
               'median': True
              }
    
training_dict = {}
In [22]:
AS = 75
# array_4D = cp.random.random((AS,AS,AS,AS))
array_4Dnp = np.random.random((AS,AS,AS,AS))
In [23]:
da = dask.array.from_array(array_4Dnp, chunks = '100 MiB')
In [24]:
da
Out[24]:
Array Chunk
Bytes 241.40 MiB 98.88 MiB
Shape (75, 75, 75, 75) (60, 60, 60, 60)
Count 16 Tasks 16 Chunks
Type float64 numpy.ndarray
75 1 75 75 75
In [25]:
fstack, featnames = nd_feature_Stack(da, sigmas, feat_select)
apply Gaussian filters anyway
get Hessian matrices
get differences of Gaussians
apply maximum filters
apply median filters
IOStream.flush timed out
IOStream.flush timed out
apply minimum filters
In [1]:
path = '/home/fische_r/NAS/testing/test_data.nc'
In [30]:
1000/128
Out[30]:
7.8125