From bb095b692e7ece36961b34e990a1cbf4da994b06 Mon Sep 17 00:00:00 2001
From: Fischer Robert <robert.fischer@psi.ch>
Date: Wed, 26 Jul 2023 14:03:56 +0200
Subject: [PATCH] use dask-iamge for gaussian filter

---
 filter_functions.py | 71 ++++++++++++++++-----------------------------
 1 file changed, 25 insertions(+), 46 deletions(-)

diff --git a/filter_functions.py b/filter_functions.py
index a7079b2..c993160 100644
--- a/filter_functions.py
+++ b/filter_functions.py
@@ -17,7 +17,8 @@ TODO: store git commit sha
 # necessary packages for feature stack
 import numpy as np
 from scipy import ndimage
-from skimage import filters
+# from skimage import filters
+import dask_image.ndfilters
 from skimage.morphology import ball
 
 import dask
@@ -104,65 +105,43 @@ class image_filter:
         self.computed = False
         
     
-    # TODO: currently loads full dataset into memory, consider aligning desired chunk already for original dataset to avoid rechunking
-    # .rechunk() causes problems downstream: "Assertion error" , WTF?!
-    # if original data soe not fit in RAM, rechunk, store to disk and load again?
-    # rechunk apparently sometimes only adds an addtional graph layer linking initial chunks
-    # def open_raw_data(self):
-    #     data = xr.open_dataset(self.data_path)
-    #     da = dask.array.from_array(data.tomo.data, chunks = self.chunks)
-        
-    #     self.original_dataset = data
-    #     self.data = da
-    
-    # def open_lazy_data(self, chunks=None):
-    #     if chunks is None: 
-    #         chunks = self.chunks
-    #     data = xr.open_dataset(self.data_path, chunks = chunks)
-    #     da = dask.array.from_array(data.tomo)
-    #     # print('maybe re-introducing rechunking, but for large datasets auto might be ok')
-    #     # print('smaller chunks might be better for slicewise training')
-    #     # print('currently provided chunks are ignored')
-    #     self.original_dataset = data#.rechunk(self.chunks)
-    #     self.data = da
-    
-    # def load_raw_data(self):
-    #     data = xr.load_dataset(self.data_path)
-    #     # da = dask.array.from_array(data.tomo).rechunk(chunks = self.chunks)
-        
-    #     da = dask.array.from_array(data.tomo.data, chunks = self.chunks)
-
-    #     self.original_dataset = data
-    #     self.data = da 
         
     def Gaussian_Blur_4D(self, sigma):
         # TODO: check on boundary mode
-        deptharray = np.ones(self.data.ndim)+4*sigma
-        deptharray = tuple(np.min([deptharray, self.data.shape], axis=0))
-        G = self.data.map_overlap(filters.gaussian, depth=deptharray, boundary='nearest', sigma = sigma)
+        # TODO: test dask-image Gaussian filter and use if it works
+        # deptharray = np.ones(self.data.ndim)+4*sigma
+        # deptharray = tuple(np.min([deptharray, self.data.shape], axis=0))
+        # G = self.data.map_overlap(filters.gaussian, depth=deptharray, boundary='nearest', sigma = sigma)
+        G = dask_image.ndfilters.gaussian_filter(self.data, mode='nearest', sigma = sigma)
+                
         self.feature_names.append('Gaussian_4D_Blur_'+f'{sigma:.1f}')
         self.calculated_features.append(G)
         self.Gaussian_4D_dict[f'{sigma:.1f}'] = G
         
     def Gaussian_Blur_space(self, sigma):      
-        deptharray = np.ones(self.data.ndim)+4*sigma
-        deptharray[-1] = 0
-        sigmas = np.ones(deptharray.shape)*sigma
-        deptharray = tuple(np.min([deptharray, self.data.shape], axis=0))
-        
+        # deptharray = np.ones(self.data.ndim)+4*sigma
+        # deptharray[-1] = 0
+        # deptharray = tuple(np.min([deptharray, self.data.shape], axis=0))
+        sigmas = np.ones(self.data.ndim)*sigma
         sigmas[-1] = 0
-        G = self.data.map_overlap(filters.gaussian, depth=deptharray, boundary='nearest', sigma = sigmas)
+        
+        # G = self.data.map_overlap(filters.gaussian, depth=deptharray, boundary='nearest', sigma = sigmas)
+        G = dask_image.ndfilters.gaussian_filter(self.data, mode='nearest', sigma = sigmas)
+        
         self.feature_names.append('Gaussian_space_'+f'{sigma:.1f}')
         self.calculated_features.append(G)
         self.Gaussian_space_dict[f'{sigma:.1f}'] = G
 
     def Gaussian_Blur_time(self, sigma):      
-        deptharray = np.ones(self.data.ndim)+4*sigma
-        deptharray[:-1] = 0
-        sigmas = np.ones(deptharray.shape)*sigma
-        deptharray = tuple(np.min([deptharray, self.data.shape], axis=0))
+        # deptharray = np.ones(self.data.ndim)+4*sigma
+        # deptharray[:-1] = 0
+        # deptharray = tuple(np.min([deptharray, self.data.shape], axis=0))
+        
+        sigmas = np.ones(self.data.ndim)*sigma
         sigmas[:-1] = 0
-        G = self.data.map_overlap(filters.gaussian, depth=deptharray, boundary='nearest', sigma = sigmas)
+        # G = self.data.map_overlap(filters.gaussian, depth=deptharray, boundary='nearest', sigma = sigmas)
+        G = dask_image.ndfilters.gaussian_filter(self.data, mode='nearest', sigma = sigmas)
+        
         self.feature_names.append('Gaussian_time_'+f'{sigma:.1f}')
         self.calculated_features.append(G)
         self.Gaussian_time_dict[f'{sigma:.1f}'] = G
@@ -443,7 +422,7 @@ class image_filter:
         self.diff_Gaussian('time')
         self.Gaussian_space_stack()
         self.diff_Gaussian('space')
-        # self.rank_filter_stack() #you have to load the entire raw data set for the dynamic part of this filter --> not so good for many time steps
+        # self.rank_filter_stack() #dask_image might provide rank-like filters soon you have to load the entire raw data set for the dynamic part of this filter --> not so good for many time steps
         self.time_stats() #does something similar like the dynamic rank filter, however only one pixel in space
         
         if self.loc_features:
-- 
GitLab