pyTrainSeg/test.ipynb at ead3eba3aae074ebee1cd19fb2a7dd7f869ff443

LEC-5422-Public/pyTrainSeg

Fork 0

Files

T

Fischer Robert da84c5924a full workflow to be tested

2022-08-05 13:43:51 +02:00

1.5 MiB

Raw Blame History

Test with real image data and multiple filters¶

In [1]:

from skimage import io
import numpy as np
import matplotlib.pyplot as plt
from skimage import filters
from skimage import feature
from skimage.morphology import disk,ball
from sklearn.ensemble import RandomForestClassifier
from scipy import ndimage
import os
import imageio
import sys

In [2]:

im = io.imread(r"U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat\test_im.tif")
plt.imshow(im)
# "U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat\water_truth.tif"

Out[2]:

<matplotlib.image.AxesImage at 0x21a7fc3b1f0>

No description has been provided for this image

load label images iteratively optimized in trainable weka segmentation¶

In [3]:

air = io.imread(r"U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat\air_truth.tif")>0
water = io.imread(r"U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat\water_truth.tif")>0
fiber = io.imread(r"U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat\fiber_truth.tif")>0
truth = io.imread(r"U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat\test_truth.tif")

develop feature Stack creation as in trainable weka segmentaion (TWS)¶

TWS creates Gaussian Blurs for sigma=0,1,2,4,8,16,... (whatever the limit is)¶

on each sigma, a sobel and hessian filter is applied¶

for hessian one image each for hessian, hessian trace, hessian determinant, hessian eigenvalue 1 & 2, hessian orientation, hessian square eigenvalue difference, normalized eigenvalue difference¶

difference of gaussians for all: larger sigma - smaller¶

Filters in TWS¶

In [155]:

def TWS_gaussian(im, sig=0):
    G = filters.gaussian(im, sigma=sig, mode='reflect') #, preserve_range=True
    fullname = ''.join(['gaussian_',f'{sig:.1f}'])
    return G, fullname

def TWS_gaussian_stack(im, sigmas):
    fullnames = []
    gstack = np.zeros((im.shape[0],im.shape[1], len(sigmas)))
    for sig,i in zip(sigmas, range(len(sigmas))):
        # if np.abs(sig-0)<0.1:
        #     gstack[:,:,i] = im
        #     name = ''.join(['gaussian_',f'{0:.1f}'])
        # else:
        gstack[:,:,i], name = TWS_gaussian(im, sig)
        fullnames.append(name)
    return gstack, fullnames

def TWS_sobel(im, sig):
    #sigma is only passed to the name! make sure it's correct
    S = filters.sobel(im, mode='reflect')
    name = ''.join(['sobel_',f'{sig:.1f}'])
    return S, name

def TWS_sobel_stack(gstack, sigmas):
    Sstack = np.zeros(gstack.shape)
    fullnames = []
    for i in range(len(sigmas)):
        Sstack[:,:,i], name = TWS_sobel(gstack[:,:,i], sigmas[i])
        fullnames.append(name)
    return Sstack, fullnames

def TWS_hessian(im, sig):
    #creates 8 images per sigma
    #sigma is only passed to the name! make sure it's correct
    a, b, d =  feature.hessian_matrix(im, mode='reflect')
    c = b
    
    mod = np.sqrt(a**2+b*c+d**2)
    trace = a+d
    det = a*d-c*b
    eig1 = (a+d)/2 + np.sqrt((4*b**2+(a-d)**2)/2)
    eig2 = (a+d)/2 - np.sqrt((4*b**2+(a-d)**2)/2)
    
    gamma_norm_eig_diff = (a-d)**2*((a-d)**2+4*b**2)
    square_norm_eig_diff = ((a-d)**2+4*b**2) 
    
    orient = 0.5*np.arccos(4*b**2+(a-d)**2)
    hessian_stack = np.dstack([mod,trace,det,eig1,eig2,orient,gamma_norm_eig_diff,square_norm_eig_diff])
    names = ['module', 'trace', 'determinant', 'eigenvalue1', 'eigenvalue2', 'orientation', 'gamma_norm_eig_diff', 'square_norm_eig_diff']

    fullnames = []
    for name in names:
        fullname = ''.join(['hessian_',name,'_',f'{sig:.1f}'])
        fullnames.append(fullname)
    return hessian_stack, fullnames

def TWS_hessian_stack(gstack, sigmas):
    size = len(sigmas)*8
    Hstack = np.zeros((gstack.shape[0],gstack.shape[1], size))
    fullnames = []
    for i in range(len(sigmas)):
        Hstack[:,:,i*8:i*8+8], names = TWS_hessian(gstack[:,:,i],sigmas[i])
        fullnames = fullnames + names
    return Hstack, fullnames
        

def TWS_diff_of_gaussians(gstack, sigmas):
    #creates a stack of {size} (see below)
    n = len(sigmas)
    size = int(n*(n-1)/2)
    
    diff_stack = np.zeros((im.shape[0], im.shape[1], size))
    fullnames = []
    cc = 0
    for i in range(1,n):
        for j in range(i):
            DG = gstack[:,:,i]-gstack[:,:,j]
            diff_stack[:,:,cc] = DG
            name = ''.join(['diff_of_gauss_',f'{sigmas[i]:.1f}','_',f'{sigmas[j]:.1f}'])
            fullnames.append(name)
            cc = cc + 1
    return diff_stack, fullnames
            
            
def TWS_minimum(im, sigma):
    M = filters.rank.minimum(im, disk(sigma))
    fullname = ''.join(['minimum_',f'{sigma:.1f}'])
    return M, fullname

def TWS_minimum_stack(im, sigmas):
    size = len(sigmas)-1
    min_stack = np.zeros((im.shape[0], im.shape[1], size))
    fullnames = []
    i = 0
    for i in range(size):
        sig = sigmas[i+1]
        min_stack[:,:,i], fullname = TWS_minimum(im, sig)
        fullnames.append(fullname)
    return min_stack, fullnames

def TWS_maximum(im, sigma):
    M = filters.rank.maximum(im, disk(sigma))
    fullname = ''.join(['maximum_',f'{sigma:.1f}'])
    return M, fullname

def TWS_maximum_stack(im, sigmas):
    size = len(sigmas)-1
    max_stack = np.zeros((im.shape[0], im.shape[1], size))
    fullnames = []
    i = 0
    for i in range(size):
        sig = sigmas[i+1]
        max_stack[:,:,i], fullname = TWS_maximum(im, sig)
        fullnames.append(fullname)
    return max_stack, fullnames

def TWS_median(im, sigma):
    M = filters.rank.median(im, disk(sigma))
    fullname = ''.join(['median_',f'{sigma:.1f}'])
    return M, fullname

def TWS_median_stack(im, sigmas):
    size = len(sigmas)-1
    med_stack = np.zeros((im.shape[0], im.shape[1], size))
    fullnames = []
    i = 0
    for i in range(size):
        sig = sigmas[i+1]
        med_stack[:,:,i], fullname = TWS_median(im, sig)
        fullnames.append(fullname)
    return med_stack, fullnames

reverse engineered feature stack¶

In [3]:

def TWS_feature_stack(im, sigmas, feat_select):
        feat_names = []
        stack_list = []
    #TODO: allow ticking off features
    
        #gaussian filters
        if feat_select['Gaussian']:
            g_stack, gfeat = TWS_gaussian_stack(im, sigmas)
            stack_list.append(g_stack)
            feat_names = feat_names + gfeat
            
        #sobel filter on every gaussian sigma
        if feat_select['Sobel']:
            s_stack, sfeat = TWS_sobel_stack(g_stack, sigmas)
            stack_list.append(s_stack)
            feat_names = feat_names + sfeat
            
        #stack of hessian stacks for every sigma
        if feat_select['Hessian']:
            h_stack, hfeat = TWS_hessian_stack(g_stack, sigmas)
            stack_list.append(h_stack)
            feat_names = feat_names + hfeat
            
        #diff of gaussians
        if feat_select['Diff of Gaussians']:
            d_stack, dfeat = TWS_diff_of_gaussians(g_stack, sigmas)
            stack_list.append(d_stack)
            feat_names = feat_names + dfeat
        
        #minimum filters
        if feat_select['minimum']:
            min_stack, minfeat = TWS_minimum_stack(im, sigmas)
            stack_list.append(min_stack)
            feat_names = feat_names + minfeat

        #maximum filters
        if feat_select['maximum']:
            max_stack, maxfeat = TWS_maximum_stack(im, sigmas)
            stack_list.append(max_stack)
            feat_names = feat_names + maxfeat
        
        #median filters
        if feat_select['median']:
            med_stack, medfeat = TWS_median_stack(im, sigmas)
            stack_list.append(med_stack)
            feat_names = feat_names + medfeat
            
        feat_stack = np.dstack(stack_list)
                        
        return feat_stack, feat_names

Function to classify one slice automatically detecting phases in truth image¶

In [4]:

def label_data_slice(im, truth, sigmas, feat_select, feat_stack=None):
    #TODO: automatically detect phases in truth image and aovid overlap
    #TODO: define format of truth image
    phase1 = truth==1
    phase2 = truth==2
    phase3 = truth==4
    
    if feat_stack is None:
        feat_stack, feat_names = TWS_feature_stack(im, sigmas, feat_select)
    
    X1 = feat_stack[phase1]
    y1 = np.zeros(X1.shape[0])
    X2 = feat_stack[phase2]
    y2 = np.ones(X2.shape[0])
    X3 = feat_stack[phase3]
    y3 = 2*np.ones(X3.shape[0])

    y = np.concatenate([y1,y2,y3])
    X = np.concatenate([X1,X2,X3])
    
    return X,y, feat_stack

In [5]:

 def classify_and_plot(X,y,im, feat_stack, plot=True):
    # TODO: allow choice and manipulation of ML method
    clf =  RandomForestClassifier(n_estimators = 300, n_jobs=-1, random_state = 42, max_features=None) 
    clf.fit(X, y)
    num_feat = feat_stack.shape[2]
    ypred = clf.predict(feat_stack.reshape(-1,num_feat))
    result = ypred.reshape(im.shape).astype(np.uint8)
    if plot:
        fig, (ax1, ax2)= plt.subplots(1,2,figsize=(12,7))
        ax1.imshow(im, cmap='Greys_r')
        ax2.imshow(result)
    return result, clf

In [6]:

def slicewise_classify_for_training(im, slice_name,sigmas, feat_select, plot=True,  feat_stack=None, truth=None, training_dict=None): #, training_path, XTM_data_path
    #consider training data from other slices but do not simpliy append to avoid duplicates
    flag = False #TODO: get rid of flag
    if training_dict is not None:
        slices = list(training_dict.keys())
        if slice_name in slices: 
            slices.remove(slice_name)
        if len(slices)>0:
            flag = True
            Xall = training_dict[slices[0]][0]
            yall = training_dict[slices[0]][1]
            for i in range(1,len(slices)):
                Xall = np.concatenate([Xall, training_dict[slices[i]][0]])
                yall = np.concatenate([yall, training_dict[slices[i]][1]])
    
    
    if feat_stack is None:
        print('creating feature stack')
        X,y, feat_stack =  label_data_slice(im, truth, sigmas, feat_select)
    else:
        X,y, feat_stack =  label_data_slice(im, truth, sigmas, feat_select, feat_stack=feat_stack) 
    
    print('training and classifying')
    
    if training_dict is not None and flag:
        Xt = np.concatenate([Xall,X])
        yt = np.concatenate([yall,y])
        Xall = None
        yall = None
    else:
        Xt = X
        yt = y
    
    result, clf = classify_and_plot(Xt,yt,im, feat_stack, plot)
    
    # print('save slice result, retrain if needed')
    # imageio.imsave(os.path.join(training_path,''.join([slice_name,'_classified.tif'])), result)
    return X, y, feat_stack, clf, result

end of definitions¶

train classifier and plot result on "test" slice¶

In [9]:

sigmas = [0, 2,4,6,8]  #hard-coded for now, sobel and hessian require that first sigma is 0, diff, gaussian(sig=0) = 0

# default feature choice
feat_select = {'Gaussian': True,
               'Sobel': True,
               'Hessian': True,
               'Diff of Gaussians': True
              }

In [10]:

slice_name = 'test'
training_path = r"U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat"

In [10]:

im = io.imread(r"U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat\test_im.tif")
air = io.imread(r"U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat\air_truth.tif")>0
water = io.imread(r"U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat\water_truth.tif")>0
fiber = io.imread(r"U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat\fiber_truth.tif")>0
truth = air+water*2+fiber*4

In [12]:

X,y, feat_stack, clf = slicewise_classify_for_training(im, slice_name, truth=truth,training_path = r"U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat", XTM_data_path = r"U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat")

creating feature stack
training and classifying
save slice result

In [13]:

training_dict = {}
training_dict[slice_name] = (X,y, feat_stack)

proper training workflow for 3 phases air, water, fiber TODO: make general¶

In [14]:

XTM_data_path = r"D:\TOMCAT_2\01_intcorrect_med_leg_0"
training_path = r"U:\01_Python\00_playground\test_pytorch\Dataset\test_tomcat\training"


time_folder = os.listdir(XTM_data_path)
timestep_folder = time_folder[0]
images_first = os.listdir(os.path.join(XTM_data_path, timestep_folder))

In [37]:

# randomly suggest slice for training
num_ts = len(time_folder)
num_slices = len(images_first)

ts = np.random.choice(range(num_ts))+1
print('try time step ',ts )
sn = np.random.choice(range(num_slices))+1
print('try slice number ', sn)

slice_name= ''.join(['ts_',str(ts),'_slice_',str(sn)])
watername = ''.join([slice_name, '_water.tif'])
waterpath = os.path.join(training_path, watername)
if not os.path.exists(waterpath):
    print('create missing training set with ImageJ-script!')

try time step  9
try slice number  326
create missing training set with ImageJ-script!

In [27]:

time_step = 15
slice_number = 519

time_folder = os.listdir(XTM_data_path)
timestep_folder = time_folder[time_step]
images = os.listdir(os.path.join(XTM_data_path, timestep_folder))
image_name = images[slice_number]
im = io.imread(os.path.join(XTM_data_path, timestep_folder, image_name))

slice_name= ''.join(['ts_',str(time_step),'_slice_',str(slice_number)])
watername = ''.join([slice_name, '_water.tif'])
waterpath = os.path.join(training_path, watername)
airname = ''.join([slice_name, '_air.tif'])
airpath = os.path.join(training_path, airname)
fibername = ''.join([slice_name, '_fiber.tif'])
fiberpath = os.path.join(training_path, fibername)

air = io.imread(airpath)>0
water = io.imread(waterpath)>0
fiber = io.imread(fiberpath)>0
truth = air+water*2+fiber*4

if slice_name in training_dict.keys():
    X,y, feat_stack, clf = slicewise_classify_for_training(im, slice_name, XTM_data_path=XTM_data_path, training_path=training_path, feat_stack=training_dict[slice_name][2], truth=truth, training_dict=training_dict)
else:
    X,y, feat_stack, clf = slicewise_classify_for_training(im, slice_name, XTM_data_path=XTM_data_path, training_path=training_path, truth=truth, training_dict=training_dict)

training_dict[slice_name] = (X,y, feat_stack)
print('training dict contains ',len(training_dict.keys()),'entries, keep track of memory')

training and classifying
save slice result

In [31]:

### make test feature_stack and names
_, feat_names = TWS_feature_stack(im, sigmas)

In [32]:

plt.figure( figsize=(16,9))
plt.plot(feat_names,clf.feature_importances_,'x')
plt.xticks(rotation=90)

Out[32]:

([0,
  1,
  2,
  3,
  4,
  5,
  6,
  7,
  8,
  9,
  10,
  11,
  12,
  13,
  14,
  15,
  16,
  17,
  18,
  19,
  20,
  21,
  22,
  23,
  24,
  25,
  26,
  27,
  28,
  29,
  30,
  31,
  32,
  33,
  34,
  35,
  36,
  37,
  38,
  39,
  40,
  41,
  42,
  43,
  44,
  45,
  46,
  47,
  48,
  49,
  50,
  51,
  52,
  53,
  54,
  55,
  56,
  57,
  58,
  59],
 [Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, ''),
  Text(0, 0, '')])

segment wood time series¶

In [7]:

XTM_data_path = r"C:\Zwischenlager\wood_time_slices\00_raw"
training_path = r"C:\Zwischenlager\wood_time_slices\training_data"


time_folder = os.listdir(XTM_data_path)
timestep_folder = time_folder[0]
images_first = os.listdir(os.path.join(XTM_data_path, timestep_folder))

In [55]:

sigmas = [0, 2,4]  #hard-coded for now, sobel and hessian require that first sigma is 0, diff, gaussian(sig=0) = 0

# default feature choice
feat_select = {'Gaussian': True,
               'Sobel': True,
               'Hessian': True,
               'Diff of Gaussians': True,
               'maximum': True,
               'minimum': True,
               'median': True
              }

In [27]:

feat_select

Out[27]:

{'Gaussian': True,
 'Sobel': True,
 'Hessian': True,
 'Diff of Gaussians': True,
 'maximum': True,
 'minimum': True,
 'median': True}

In [56]:

training_dict = {}

iteratative loop¶

In [11]:

# randomly suggest slice for training
num_ts = len(time_folder)
num_slices = len(images_first)

ts = np.random.choice(range(num_ts))+1
print('try time step ',ts )
sn = np.random.choice(range(num_slices))+1
print('try slice number ', sn)

slice_name= ''.join(['ts_',str(ts),'_slice_',str(sn)])
watername = ''.join([slice_name, '_water.tif'])
waterpath = os.path.join(training_path, watername)
if not os.path.exists(waterpath):
    print('create missing training set with ImageJ-script!')

try time step  6
try slice number  87
create missing training set with ImageJ-script!

In [57]:

time_step = 5
slice_number = 54

time_folder = os.listdir(XTM_data_path)
timestep_folder = time_folder[time_step]
images = os.listdir(os.path.join(XTM_data_path, timestep_folder))
image_name = images[slice_number]
im = io.imread(os.path.join(XTM_data_path, timestep_folder, image_name))

slice_name= ''.join(['ts_',str(time_step),'_slice_',str(slice_number)])
watername = ''.join([slice_name, '_water.tif'])
waterpath = os.path.join(training_path, watername)
airname = ''.join([slice_name, '_air.tif'])
airpath = os.path.join(training_path, airname)
fibername = ''.join([slice_name, '_fiber.tif'])
fiberpath = os.path.join(training_path, fibername)

air = io.imread(airpath)>0
water = io.imread(waterpath)>0
fiber = io.imread(fiberpath)>0
truth = air+water*2+fiber*4

if slice_name in training_dict.keys():
    X,y, feat_stack, clf = slicewise_classify_for_training(im, slice_name,sigmas,XTM_data_path, training_path, feat_select,  feat_stack=training_dict[slice_name][2], truth=truth, training_dict=training_dict)
else:
    X,y, feat_stack, clf = slicewise_classify_for_training(im, slice_name,sigmas, XTM_data_path, training_path, feat_select, truth=truth, training_dict=training_dict)

training_dict[slice_name] = (X,y, feat_stack)
print('training dict contains ',len(training_dict.keys()),'entries, keep track of memory')

creating feature stack

C:\Users\fische_r\Miniconda3\envs\pyweka\lib\site-packages\skimage\filters\rank\generic.py:262: UserWarning: Bad rank filter performance is expected due to a large number of bins (38360), equivalent to an approximate bitdepth of 15.2.
  image, footprint, out, mask, n_bins = _preprocess_input(image, footprint,

training and classifying
save slice result, retrain if needed
training dict contains  1 entries, keep track of memory

In [58]:

### make test feature_stack and names
_, feat_names = TWS_feature_stack(im, sigmas, feat_select)

C:\Users\fische_r\Miniconda3\envs\pyweka\lib\site-packages\skimage\filters\rank\generic.py:262: UserWarning: Bad rank filter performance is expected due to a large number of bins (38360), equivalent to an approximate bitdepth of 15.2.
  image, footprint, out, mask, n_bins = _preprocess_input(image, footprint,

In [29]:

len(feat_names)

Out[29]:

In [59]:

plt.figure( figsize=(16,9))
plt.stem(feat_names,clf.feature_importances_,'x')
plt.xticks(rotation=90)
plt.ylabel('importance')

C:\Users\fische_r\AppData\Local\Temp\ipykernel_12928\1483009324.py:2: MatplotlibDeprecationWarning: Passing the linefmt parameter positionally is deprecated since Matplotlib 3.5; the parameter will become keyword-only two minor releases later.
  plt.stem(feat_names,clf.feature_importances_,'x')

Out[59]:

Text(0, 0.5, 'importance')

In [40]:

import robpylib

In [68]:

feat_files = []
for feat in feat_names:
    feat_files.append(feat+'.tif')

In [69]:

robpylib.CommonFunctions.ImportExport.WriteStackNew(r"C:\Zwischenlager\wood_time_slices\pywekastack", feat_files, feat_stack)

In [44]:

feat_names[0]

Out[44]:

'gaussian_0.0'

wood series with training within jupyter¶

test on single image, for now 8bit version necessary for display¶

In [7]:

from ipywidgets import Image
from ipywidgets import ColorPicker, IntSlider, link, AppLayout, HBox
from ipycanvas import RoughCanvas, hold_canvas, Canvas, MultiCanvas

In [8]:

import os
from skimage import io
import matplotlib.pyplot as plt
import numpy as np
im8 = io.imread(r"C:\Zwischenlager\wood_time_slices\8bit_test.tif")
im = io.imread(r"C:\Zwischenlager\wood_time_slices\16bit_test.tif")
truthpath =  r"C:\Zwischenlager\wood_time_slices\tst_truth.tif"
plt.imshow(im8)

Out[8]:

<matplotlib.image.AxesImage at 0x13550e06620>

In [9]:

resultim = np.zeros(im.shape, dtype=np.uint8)
if os.path.exists(truthpath):
    truth = io.imread(truthpath)
    print('existing label set loaded')
else:
    truth = resultim.copy()
slice_name = 'test'

In [10]:

sigmas = [0, 2,4, 8]  #hard-coded for now, sobel and hessian require that first sigma is 0, diff, gaussian(sig=0) = 0

# default feature choice
feat_select = {'Gaussian': True,
               'Sobel': True,
               'Hessian': True,
               'Diff of Gaussians': True,
               'maximum': True,
               'minimum': True,
               'median': True
              }
    
training_dict = {}

Training cells¶

In [14]:

width = im8.shape[1]
height = im8.shape[0]

Mcanvas = MultiCanvas(4, width=width, height=height)
background = Mcanvas[0]
resultdisplay = Mcanvas[2]
truthdisplay = Mcanvas[1]
canvas = Mcanvas[3]
canvas.sync_image_data = True

drawing = False
position = None
shape = []


def on_mouse_down(x, y):
    global drawing
    global position
    global shape

    drawing = True
    position = (x, y)
    shape = [position]


def on_mouse_move(x, y):
    global drawing
    global position
    global shape

    if not drawing:
        return

    with hold_canvas():
        canvas.stroke_line(position[0], position[1], x, y)

        position = (x, y)

    shape.append(position)


def on_mouse_up(x, y):
    global drawing
    global position
    global shape

    drawing = False

    with hold_canvas():
        canvas.stroke_line(position[0], position[1], x, y)
        canvas.fill_polygon(shape)

    shape = []

image_data = np.stack((im8, im8, im8), axis=2)
background.put_image_data(image_data, 0, 0)

# alpha = 0.15
resultdisplay.global_alpha = 0.15
# result_data = np.stack((255*(resultim==0), 255*(resultim==1), 255*(resultim==2)), axis=2)
if np.any(resultim>0):
    result_data = np.stack((255*(resultim==0), 255*(resultim==1), 255*(resultim==2)), axis=2)
else:
    result_data = np.stack((0*resultim, 0*resultim, 0*resultim), axis=2)
resultdisplay.put_image_data(result_data, 0, 0)

# truth_data = np.stack((255*(truth==1), 2555*(truth==2), 2555*(truth==4)), axis=2)
# truthdisplay.put_image_data(truth_data, 0, 0) 
# truthdisplay.global_alpha = 0.05

canvas.on_mouse_down(on_mouse_down)
canvas.on_mouse_move(on_mouse_move)
canvas.on_mouse_up(on_mouse_up)

# canvas.stroke_style = "#749cb8"

# canvas.global_alpha = 0.75

picker = ColorPicker(description="Color:", value="#ff0000")
slidealpha = IntSlider(description="Result overlay", value=0.15)

link((picker, "value"), (canvas, "stroke_style"))
link((picker, "value"), (canvas, "fill_style"))
# link((slidealpha, "value"), (resultdisplay, "global_alpha"))

HBox((Mcanvas, picker, slidealpha))
#print('paint image with #ff0000 for air, #00ff00 for water and #0000ff for fiber')

HBox(children=(MultiCanvas(height=690, width=744), ColorPicker(value='#ff0000', description='Color:'), IntSlid…

In [12]:

#create truth image from image, save to file
label_set = canvas.get_image_data()

truth[label_set[:,:,0]>0] = 1
truth[label_set[:,:,1]>0] = 2
truth[label_set[:,:,2]>0] = 4

imageio.imsave(truthpath, truth)

In [13]:

if slice_name in training_dict.keys():
    X,y, feat_stack, clf, resultim = slicewise_classify_for_training(im, slice_name,sigmas, feat_select,  feat_stack=training_dict[slice_name][2], truth=truth, training_dict=training_dict)
else:
    X,y, feat_stack, clf, resultim = slicewise_classify_for_training(im, slice_name,sigmas, feat_select, truth=truth, training_dict=training_dict) #XTM_data_path, training_path, 

training_dict[slice_name] = (X,y, feat_stack)
print('training dict contains ',len(training_dict.keys()),'entries, keep track of memory')

creating feature stack

C:\Users\fische_r\Miniconda3\envs\pyweka\lib\site-packages\skimage\filters\rank\generic.py:262: UserWarning: Bad rank filter performance is expected due to a large number of bins (40107), equivalent to an approximate bitdepth of 15.3.
  image, footprint, out, mask, n_bins = _preprocess_input(image, footprint,

training and classifying
training dict contains  1 entries, keep track of memory

In [61]:

### make test feature_stack and names
_, feat_names = TWS_feature_stack(im, sigmas, feat_select)

C:\Users\fische_r\Miniconda3\envs\pyweka\lib\site-packages\skimage\filters\rank\generic.py:262: UserWarning: Bad rank filter performance is expected due to a large number of bins (40107), equivalent to an approximate bitdepth of 15.3.
  image, footprint, out, mask, n_bins = _preprocess_input(image, footprint,

In [84]:

plt.figure( figsize=(16,9))
plt.stem(feat_names,clf.feature_importances_,'x')
plt.xticks(rotation=90)
plt.ylabel('importance')

C:\Users\fische_r\AppData\Local\Temp\ipykernel_13852\1483009324.py:2: MatplotlibDeprecationWarning: Passing the linefmt parameter positionally is deprecated since Matplotlib 3.5; the parameter will become keyword-only two minor releases later.
  plt.stem(feat_names,clf.feature_importances_,'x')

Out[84]:

Text(0, 0.5, 'importance')

4D Filters¶

In [3]:

from skimage import io
import numpy as np
import matplotlib.pyplot as plt
from skimage import filters
from skimage import feature
from skimage.morphology import disk,ball
# from sklearn.ensemble import RandomForestClassifier
from scipy import ndimage
import os
import imageio
import sys

In [4]:

import dask
import dask.array
# import cupy as cp
# import cucim
import numpy as np
import matplotlib.pyplot as plt
from itertools import combinations_with_replacement
import xarray as xr

In [5]:

array_4D = None
gauss_4D = None
AS = 200
# array_4D = cp.random.random((AS,AS,AS,AS))
array_4Dnp = np.random.random((AS,AS,AS,AS))

%timeit -n 2 gauss_4D = cucim.skimage.filters.gaussian(array_4D, sigma = 2) gauss_4D = Nonedef custom_GPU_Gaussian(array, sigma=1): arraycp = cp.array(array) arraycp = cucim.skimage.filters.gaussian(arraycp, sigma=sigma) array = cp.asnumpy(arraycp) return array

RAM limited GPU accelaration outperformed by massive CPU parallelization¶

--> do dask parallelization on cpu chunking array in a way to use as many cores as possible
can entire 5D-feature stack fit into memory? data streaming necessary at one point
features

Gaussian
Minimum
Maximum
(Median)
(Hessian)
(Sobel)

what about hessian matrix --> use elements of nd-H-matrix and n eigenvalue?!
Sobel as edge filter in 2D like hessian or other edge extraction filter?

In [7]:

# shows order of hessian elements
axes = range(array_4Dnp.ndim)
for ax0, ax1 in combinations_with_replacement(axes, 2):
    print(ax0, ax1)

In [8]:

# functions take chunked dask-array as input
def nd_gaussian(da, sig = 0):
    if np.abs(sig-0)<0.1:
        G = np.array(da)
    else:
        G = da.map_overlap(filters.gaussian, depth=4*sig+1, sigma = sig).compute()
    fullname = ''.join(['gaussian_',f'{sig:.1f}'])
    return G, fullname

#TODO create a class that makes the feature stacks
def nd_gaussian_stack(da, sigmas):
    fullnames = []
    gstack = np.zeros(list(da.shape) + [len(sigmas)])
    for sig,i in zip(sigmas, range(len(sigmas))):
        gstack[...,i], name = nd_gaussian(da, sig)
        fullnames.append(name)
    return gstack, fullnames

In [9]:

def nd_diff_of_gaussian(gstack, sigmas):
#     #creates a stack of {size} (see below)
    n = len(sigmas)
    size = int(n*(n-1)/2)
    dstack = np.zeros(list(da.shape) + [size])
    fullnames = []
    cc = 0
    for i in range(1,n):
        for j in range(i):
            dstack[...,cc] = gstack[...,i] - gstack[...,j]
            name = ''.join(['diff_of_gauss_',f'{sigmas[i]:.1f}','_',f'{sigmas[j]:.1f}'])
            fullnames.append(name)
            cc = cc + 1
    return dstack, fullnames

In [10]:

def ball_4d(sig):
    bnd = np.zeros((sig*2+1,sig*2+1,sig*2+1,sig*2+1), dtype = bool)
    bnd[sig,sig,sig,sig] = True
    ecd = ndimage.distance_transform_edt(~bnd)
    bnd = (ecd<sig+0.01).astype(int)
    return bnd

In [11]:

def nd_rank_like_filter(da, sigma, option):
    """
     input
     da - chunked das array up to 4D
     sigma - kernel size, scalar
     option, str ('minimum', 'maximum', 'median')
    """
    if da.ndim == 2:
        fp = disk(sigma)
    if da.ndim == 3:
        fp = ball(sigma)
    if da.ndim == 4:
        fp = ball_4d(sigma)
        
    if option == 'minimum':
        fun = ndimage.minimum_filter
    elif option == 'maximum':
        fun = ndimage.maximum_filter
    elif option == 'median':
        fun = ndimage.median_filter
    else:
        print(option+' not available')
    M = da.map_overlap(fun, depth=sigma+1, footprint=fp).compute()
    fullname = ''.join([option,'_',f'{sigma:.1f}'])
    return M, fullname

def nd_rank_like_stack(da, sigmas, option):
    fullnames = []
    mstack = np.zeros(list(da.shape) + [len(sigmas)-1])
    for sig,i in zip(sigmas[1:], range(len(sigmas)-1)):
        mstack[...,i], name = nd_rank_like_filter(da, sig, option)
        fullnames.append(name)
    return mstack, fullnames

In [12]:

def nd_Hessian_matrix(G):
    """
    copied from skimage.feature.hessian_matrix
    just directly using Gaussian fitered arrays and dask
    """
    
    daG = dask.array.from_array(G)
    gradients = dask.array.gradient(daG)
    axes = range(G.ndim)
    H_elems = [dask.array.gradient(gradients[ax0], axis=ax1).compute() for ax0, ax1 in combinations_with_replacement(axes, 2)]
    elems = [(ax0,ax1) for ax0, ax1 in combinations_with_replacement(axes, 2)]
    return H_elems, elems

def nd_Hessian_stack(G, sigma):
    H_elems, elems = nd_Hessian_matrix(G)
    hstack = np.zeros(list(G.shape)+[len(elems)])
    
    #TODO: this is slow, find some better numpy function
    for i in range(len(elems)):
        hstack[...,i] = H_elems[i]
    
    # print('got Hessian matrices, now doing the eigs')
    # eigs = feature.hessian_matrix_eigvals(H_elems) 
 # for now ignore the eigenvalues (too computationally expensive and H_elems already contains the image curvature  

    fullnames = []
    for i,j in elems:
        fullname = ''.join(['hessian_',str(i),str(j),'_',f'{sigma:.1f}'])
        fullnames.append(fullname) 
        
    return hstack, fullnames

def nd_Hessian_stacks(gstack, sigmas):
    flag = True
    fullnames = []
    for (i, sigma) in zip(range(gstack.shape[-1]), sigmas):
        a, b = nd_Hessian_stack(gstack[...,i], sigma)
        asize = a.shape[-1]
        if flag:
            flag = False
            hstacks = np.zeros(list(gstack[...,-1].shape)+[len(sigmas)*asize])
        hstacks[...,i*asize:i*asize+asize] = a
        fullnames = fullnames + b
    return hstacks, fullnames

In [13]:

def nd_feature_Stack(da, sigmas, feat_select):
#     TODO: make more elegant
    
    fstack = []
    featnames = []
    
    print('apply Gaussian filters anyway')
    gstack, gnames = nd_gaussian_stack(da, sigmas)
    
    if feat_select['Gaussian']:
        featnames = featnames + gnames
        fstack.append(gstack)
        
    if feat_select['Hessian']:
        print('get Hessian matrices')
        hstack, hnames = nd_Hessian_stacks(gstack, sigmas)
        featnames = featnames + hnames 
        fstack.append(hstack)
        
    if feat_select['Diff of Gaussians']:
        print('get differences of Gaussians')
        dstack, dnames = nd_diff_of_gaussian(gstack, sigmas)
        featnames = featnames + dnames
        fstack.append(dstack)
        
    if feat_select['maximum']:
        print('apply maximum filters')
        maxstack, maxnames = nd_rank_like_stack(da, sigmas, option='maximum')
        featnames = featnames + maxnames
        fstack.append(maxstack)
        
    if feat_select['median']:
        print('apply median filters')
        medstack, mednames = nd_rank_like_stack(da, sigmas, option='median')
        featnames = featnames + mednames
        fstack.append(medstack)
        
    if feat_select['minimum']:
        print('apply minimum filters')
        minstack, minnames = nd_rank_like_stack(da, sigmas, option='minimum')
        featnames = featnames + minnames
        fstack.append(minstack)
        
    return np.concatenate(fstack, axis=-1), featnames

In [26]:

def feat_stack_to_nc(fstack, featnames, path = None):
    #TODO: include metadata and
    data = xr.Dataset({'feature_stack': (['x','y','z','time', 'feature'], fstack)},
                       coords = {'x': np.arange(fstack.shape[0]),
                                 'y': np.arange(fstack.shape[1]),
                                 'z': np.arange(fstack.shape[2]),
                                 'time': np.arange(fstack.shape[3]),
                                 'feature': featnames},
                      attrs = {'name': 'test'})
    if path is not None:
        data.to_netcdf(path)
    return data

In [15]:

sigmas = [0, 2,4, 8]  #hard-coded for now, sobel and hessian require that first sigma is 0, diff, gaussian(sig=0) = 0

# default feature choice
feat_select = {'Gaussian': True, 
               # 'Sobel': True,
               'Hessian': True,
               'Diff of Gaussians': True,
               'maximum': True,
               'minimum': True,
               'median': True
              }
    
training_dict = {}

In [22]:

AS = 75
# array_4D = cp.random.random((AS,AS,AS,AS))
array_4Dnp = np.random.random((AS,AS,AS,AS))

In [23]:

da = dask.array.from_array(array_4Dnp, chunks = '100 MiB')

In [24]:

da

Out[24]:

	Array	Chunk
Bytes	241.40 MiB	98.88 MiB
Shape	(75, 75, 75, 75)	(60, 60, 60, 60)
Count	16 Tasks	16 Chunks
Type	float64	numpy.ndarray

In [25]:

fstack, featnames = nd_feature_Stack(da, sigmas, feat_select)

apply Gaussian filters anyway
get Hessian matrices
get differences of Gaussians
apply maximum filters
apply median filters

IOStream.flush timed out
IOStream.flush timed out

apply minimum filters

In [1]:

path = '/home/fische_r/NAS/testing/test_data.nc'

In [30]:

1000/128

Out[30]:

7.8125

1.5 MiB Raw Blame History

Test with real image data and multiple filters¶

load label images iteratively optimized in trainable weka segmentation¶

develop feature Stack creation as in trainable weka segmentaion (TWS)¶

TWS creates Gaussian Blurs for sigma=0,1,2,4,8,16,... (whatever the limit is)¶

on each sigma, a sobel and hessian filter is applied¶

for hessian one image each for hessian, hessian trace, hessian determinant, hessian eigenvalue 1 & 2, hessian orientation, hessian square eigenvalue difference, normalized eigenvalue difference¶

difference of gaussians for all: larger sigma - smaller¶

Filters in TWS¶

reverse engineered feature stack¶

Function to classify one slice automatically detecting phases in truth image¶

end of definitions¶

train classifier and plot result on "test" slice¶

proper training workflow for 3 phases air, water, fiber TODO: make general¶

segment wood time series¶

iteratative loop¶

wood series with training within jupyter¶

test on single image, for now 8bit version necessary for display¶

Training cells¶

4D Filters¶

RAM limited GPU accelaration outperformed by massive CPU parallelization¶

1.5 MiB

Raw Blame History