angelolab · ngreenwald · Jun 24, 2022 · Jun 22, 2022 · Jun 22, 2022 · Jun 23, 2022
diff --git a/ark/phenotyping/som_utils.py b/ark/phenotyping/som_utils.py
@@ -748,7 +748,7 @@ def create_fov_pixel_data(fov, channels, img_data, seg_labels, pixel_norm_val,
 
 def preprocess_fov(base_dir, tiff_dir, data_dir, subset_dir, seg_dir, seg_suffix,
                    img_sub_folder, is_mibitiff, channels, blur_factor,
-                   subset_proportion, pixel_norm_val, dtype, seed, fov):
+                   subset_proportion, pixel_norm_val, dtype, seed, channel_norm_df, fov):
     """Helper function to read in the FOV-level pixel data, run `create_fov_pixel_data`,
     and save the preprocessed data.
 
@@ -784,6 +784,8 @@ def preprocess_fov(base_dir, tiff_dir, data_dir, subset_dir, seg_dir, seg_suffix
             The type to load the image segmentation labels in
         seed (int):
             The random seed to set for subsetting
+        channel_norm_df (pandas.DataFrame):
+            The channel normalization values to use
         fov (str):
             The name of the FOV to preprocess
 
@@ -819,6 +821,13 @@ def preprocess_fov(base_dir, tiff_dir, data_dir, subset_dir, seg_dir, seg_suffix
     # subset for the channel data
     img_data = img_xr.loc[fov, :, :, channels].values.astype(np.float32)
 
+    # create vector for normalizing image data
+    norm_vect = channel_norm_df['norm_val'].values
+    norm_vect = np.array(norm_vect).reshape([1, 1, len(norm_vect)])
+
+    # normalize image data
+    img_data = img_data / norm_vect
+
     # set seed for subsetting
     np.random.seed(seed)
 
@@ -928,7 +937,6 @@ def create_pixel_matrix(fovs, channels, base_dir, tiff_dir, seg_dir,
     channel_norm_path = os.path.join(base_dir, 'channel_norm.feather')
 
     if not os.path.exists(channel_norm_path):
-
         # compute channel percentiles
         channel_norm_df = calculate_channel_percentiles(tiff_dir=tiff_dir, fovs=fovs,
                                                         channels=channels,
@@ -961,7 +969,7 @@ def create_pixel_matrix(fovs, channels, base_dir, tiff_dir, seg_dir,
     fov_data_func = partial(
         preprocess_fov, base_dir, tiff_dir, data_dir, subset_dir,
         seg_dir, seg_suffix, img_sub_folder, is_mibitiff, channels, blur_factor,
-        subset_proportion, pixel_norm_val, dtype, seed
+        subset_proportion, pixel_norm_val, dtype, seed, channel_norm_df
     )
 
     # define the multiprocessing context

diff --git a/ark/phenotyping/som_utils_test.py b/ark/phenotyping/som_utils_test.py
@@ -1124,12 +1124,18 @@ def test_preprocess_fov():
             io.imsave(os.path.join(seg_dir, file_name), rand_img,
                       check_contrast=False)
 
+        # generate sample channel normalization values
+        channel_norm_df = pd.DataFrame.from_dict({
+            'channel': chans,
+            'norm_val': np.repeat(10, repeats=len(chans))
+        })
+
         # run the preprocessing for fov0
         # NOTE: don't test the return value, leave that for test_create_pixel_matrix
         som_utils.preprocess_fov(
             temp_dir, tiff_dir, 'pixel_mat_data', 'pixel_mat_subsetted',
             seg_dir, '_feature_0.tif', 'TIFs', False, ['chan0', 'chan1', 'chan2'],
-            2, 0.1, 1, 'int16', 42, 'fov0'
+            2, 0.1, 1, 'int16', 42, channel_norm_df, 'fov0'
         )
 
         fov_data_path = os.path.join(