update jansen

weecology · Nov 12, 2024 · e425fbb · e425fbb
1 parent 6b3536a
commit e425fbb
Show file tree

Hide file tree

Showing 3 changed files with 89 additions and 22 deletions.
diff --git a/data_prep/DetectTree2.py b/data_prep/DetectTree2.py
@@ -0,0 +1,73 @@
+from deepforest.preprocess import split_raster, read_file
+from deepforest.visualize import plot_results
+import pandas as pd
+import geopandas as gpd
+import rasterio
+import numpy as np
+import random
+import os
+import cv2
+
+def clean_up_rgb():
+    rgb = "/orange/ewhite/DeepForest/DetectTree2/Sep_MA14_21_orthomosaic_20141023_reprojected_full_res.tif"
+    src = rasterio.open(rgb)
+    r = src.read()
+    print(r.shape)
+    r = r[:3,:,:]
+    r = r/65535.0 * 255
+    # Set no data to 0
+    r[np.isnan(r)] = 0
+    r = r.astype(int)
+
+    # Save raster
+    meta = src.meta.copy()
+    meta.update(count = 3)
+    meta.update(dtype=rasterio.uint8)
+    meta.update(nodata=0)
+
+    with rasterio.open("/orange/ewhite/DeepForest/DetectTree2/Sep_MA14_21_orthomosaic_20141023_reprojected_full_res_corrected.tif", 'w', **meta) as dst:
+        dst.write(r)
+
+def generate():
+    rgb = "/orange/ewhite/DeepForest/DetectTree2/RCD105_MA14_21_orthomosaic_20141023_reprojected_full_res_crop1_rgb_corrected.tif"
+    shps = ["/orange/ewhite/DeepForest/DetectTree2/sep_east.shp", "/orange/ewhite/DeepForest/DetectTree2/sep_west.shp"]
+    all_annotations = []
+    for shp in shps:
+        gdf = gpd.read_file(shp)
+        gdf["image_path"] = rgb
+        gdf["label"] = "Tree"
+        annotations = read_file(gdf)
+        annotations = annotations[annotations.is_valid]
+        annotations["image_path"] = os.path.basename(rgb)
+        annotations = read_file(annotations, root_dir="/orange/ewhite/DeepForest/DetectTree2/")
+        all_annotations.append(annotations)
+    all_annotations = pd.concat(all_annotations)
+
+    crop_anotations = split_raster(
+        all_annotations,
+        path_to_raster=rgb,
+        patch_size=1500,
+        allow_empty=False,
+        base_dir="/orange/ewhite/DeepForest/DetectTree2/pngs/")
+
+    # Make full path
+    crop_anotations["image_path"] = "/orange/ewhite/DeepForest/DetectTree2/pngs/" + annotations["image_path"]
+    crop_anotations["source"] = "Ball et al. 2023"
+    crop_anotations.to_csv("/orange/ewhite/DeepForest/DetectTree2/annotations.csv")
+
+    return annotations
+
+if __name__ == "__main__":
+    clean_up_rgb()
+    annotations_base_path = generate()    
+    annotations_base_path["image_path"] = annotations_base_path["image_path"].apply(lambda x: os.path.basename(x))
+    annotations_base_path.root_dir = "/orange/ewhite/DeepForest/DetectTree2/pngs/"
+
+    # plot 5 samples in a panel  
+    images_to_plot = random.sample(annotations_base_path.image_path.unique().tolist(), 5)
+    for image in images_to_plot:
+        df_to_plot = annotations_base_path[annotations_base_path.image_path == image]
+        df_to_plot = read_file(df_to_plot)
+        df_to_plot.root_dir = "/orange/ewhite/DeepForest/DetectTree2/pngs/"
+        height, width, channels = cv2.imread(df_to_plot.root_dir + df_to_plot.image_path.iloc[0]).shape
+        plot_results(df_to_plot, height=height,width=width)
diff --git a/data_prep/Jansen2023.py b/data_prep/Jansen2023.py
@@ -1,18 +1,17 @@
 import glob
-import os
 import pandas as pd
 from deepforest.utilities import read_file
+from deepforest.visualize import plot_results
 from deepforest.preprocess import split_raster
 import geopandas as gpd
 import rasterio as rio
 from rasterio.plot import show
 from matplotlib import pyplot as plt
+import os
 from shapely.geometry import box
-import shutil
 
 def Jansen_2023():
-    shps = glob.glob("/blue/ewhite/DeepForest/Jansen_2023/images/*.shp")
-    images = glob.glob("/blue/ewhite/DeepForest/Jansen_2023/images/*.tif")
+    shps = glob.glob("/orange/ewhite/DeepForest/Jansen_2023/images/*.shp")
 
     split_annotations = []
     for shp in shps:
@@ -23,39 +22,34 @@ def Jansen_2023():
         gdf["image_path"] = image
         gdf["label"] = "Tree"
 
-        # Confirm overlap
-        #src_bounds = rio.open(image).bounds
-        #fig, ax = plt.subplots(figsize=(10, 10))
-        #gpd.GeoSeries(box(*src_bounds)).plot(color="red", alpha=0.3, ax=ax)
-        #gdf.plot(ax=ax, alpha=0.3)
-        #plt.savefig("fig.png")
-
         annotations = read_file(input=gdf)
 
         split_annotations_1 = split_raster(
             annotations,
             path_to_raster=image,
             patch_size=2000,
             allow_empty=False, 
-            base_dir="/blue/ewhite/DeepForest/Jansen_2023/pngs"
+            base_dir="/orange/ewhite/DeepForest/Jansen_2023/pngs"
         )
         split_annotations.append(split_annotations_1)
 
     split_annotations = pd.concat(split_annotations)
     split_annotations = split_annotations[~(split_annotations.geometry.geom_type=="MultiPolygon")]
 
+    # view sample images
+    split_annotations.root_dir = "/orange/ewhite/DeepForest/Jansen_2023/pngs"
+
+    # Plot a sample image
+    sample = split_annotations.sample(1)
+    sample.root_dir = "/orange/ewhite/DeepForest/Jansen_2023/pngs"
+    width, height = rio.open(os.path.join(sample.root_dir, sample.image_path.values[0])).shape
+    plot_results(sample, height=height, width=width)
+    plt.savefig("current.png")
+
     # Add full path to images
-    split_annotations["image_path"] = split_annotations.image_path.apply(lambda x: "/blue/ewhite/DeepForest/Jansen_2023/pngs/{}".format(x))
-
-    # Split train test based on image path
-    split_images = split_annotations.image_path.unique()
-    train_images = split_images[0:int(len(split_images) * 0.8)]
-    test_images = [x for x in split_images if x not in train_images]
-
-    split_annotations["split"] = "train"
-    split_annotations.loc[split_annotations.image_path.isin(test_images), "split"] = "test"
+    split_annotations["image_path"] = split_annotations.image_path.apply(lambda x: "/orange/ewhite/DeepForest/Jansen_2023/pngs/{}".format(x))
     split_annotations["source"] = "Jansen et al. 2023"
-    split_annotations.to_csv("/blue/ewhite/DeepForest/Jansen_2023/pngs/annotations.csv")
+    split_annotations.to_csv("/orange/ewhite/DeepForest/Jansen_2023/pngs/annotations.csv")
 
 if __name__ == "__main__":
     Jansen_2023()
diff --git a/docs/public/Jansen_et_al._2023.png b/docs/public/Jansen_et_al._2023.png