From 5928f7cb0735920a5b0554b3597bb3844e645a02 Mon Sep 17 00:00:00 2001
From: Ali Khan <alik@robarts.ca>
Date: Sat, 28 Sep 2024 08:53:24 -0400
Subject: [PATCH 01/18] swap uncorr and corr in flatfield corr

---
 resources/qc/ff_html_temp.html | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/resources/qc/ff_html_temp.html b/resources/qc/ff_html_temp.html
index 658b26d..ba6ee7a 100644
--- a/resources/qc/ff_html_temp.html
+++ b/resources/qc/ff_html_temp.html
@@ -43,13 +43,13 @@ <h2>Chunk - {{ chunk_num }} Channel - {{ loop.index0 }}</h2>
         <tr>
     {%- for image in channel %}
             <td>
-                <img src="{{ image.img_corr }}"></img>
-                <h3>Corrected</h3>
+                <img src="{{ image.img_uncorr }}"></img>
+                <h3>Uncorrected</h3>
                 <p>Slice-{{ image.slice }}</p>
             </td>
             <td>
-                <img src="{{ image.img_uncorr }}"></img>
-                <h3>Uncorrected</h3>
+                <img src="{{ image.img_corr }}"></img>
+                <h3>Corrected</h3>
                 <p>Slice-{{ image.slice }}</p>
             </td>
     {%- endfor %}
@@ -117,4 +117,4 @@ <h3>Uncorrected</h3>
             }
         })
     </script>
-</body>
\ No newline at end of file
+</body>

From 1ff3be8eb600f4d9c467acc28726fe788188550e Mon Sep 17 00:00:00 2001
From: Ali Khan <alik@robarts.ca>
Date: Sat, 28 Sep 2024 09:20:52 -0400
Subject: [PATCH 02/18] remove fiji from workflow

---
 README.md                              |   5 +-
 config/config.yml                      |   8 +-
 workflow/macros/AutostitchMacro.ijm    |  56 ---------
 workflow/macros/FuseImageMacroZarr.ijm |  47 -------
 workflow/rules/bigstitcher.smk         | 168 +------------------------
 workflow/rules/common.smk              |  46 -------
 6 files changed, 9 insertions(+), 321 deletions(-)
 delete mode 100644 workflow/macros/AutostitchMacro.ijm
 delete mode 100644 workflow/macros/FuseImageMacroZarr.ijm

diff --git a/README.md b/README.md
index e5d5b8d..018def7 100644
--- a/README.md
+++ b/README.md
@@ -18,6 +18,7 @@ Takes TIF images (tiled or prestitched) and outputs a validated BIDS Microscopy
  - Python >= 3.11
  - Lightsheet data:
     - Raw Ultramicroscope Blaze OME TIFF files (include `blaze` in the acquisition tag)
+        - can be 2D or 3D TIFF files
     - Prestitched TIFF files (include `prestitched` in the acquisition tag)
 
 
@@ -63,10 +64,8 @@ or for snakemake<8.0, use:
 snakemake -c all --use-singularity 
 ```
 
-Note: if you run the workflow on a system with large memory, you will need to set the heap size for the stitching and fusion rules. This can be done with e.g.: `--set-resources bigstitcher_spark_stitching:mem_mb=60000 bigstitcher_spark_fusion:mem_mb=100000`
+Note: if you run the workflow on a system with large memory, you will need to set the heap size for the stitching and fusion rules. This can be done with e.g.: `--set-resources bigstitcher_stitching:mem_mb=60000 bigstitcher_fusion:mem_mb=100000`
 
 7. If you want to run the workflow using a batch job submission server, please see the executor plugins here: https://snakemake.github.io/snakemake-plugin-catalog/
 
-
-Alternate usage of this workflow (making use of conda) is described in the [Snakemake Workflow Catalog](https://snakemake.github.io/snakemake-workflow-catalog?repo=khanlab/SPIMprep).
 <!--intro-end-->
diff --git a/config/config.yml b/config/config.yml
index edc16ba..ae096d2 100644
--- a/config/config.yml
+++ b/config/config.yml
@@ -36,12 +36,10 @@ bigstitcher:
     downsample_in_x: 4
     downsample_in_y: 4
     downsample_in_z: 1
-    method: "phase_corr" #unused
-    methods: #unused
+    methods: #unused, only for reference
       phase_corr: "Phase Correlation" 
       optical_flow: "Lucas-Kanade"  
   filter_pairwise_shifts:
-    enabled: 1 #unused
     min_r: 0.7
     max_shift_total: 50
   global_optimization:
@@ -64,7 +62,7 @@ bigstitcher:
     block_size_factor_z: 32
 
 ome_zarr:
-  desc: sparkstitchedflatcorr
+  desc: stitchedflatcorr
   max_downsampling_layers: 5 # e.g. 4 levels: { 0: orig, 1: ds2, 2: ds4, 3: ds8, 4: ds16}
   rechunk_size: #z, y, x 
     - 1 
@@ -155,5 +153,5 @@ report:
 
 
 containers:
-  spimprep: 'docker://khanlab/spimprep-deps:main'
+  spimprep: 'docker://khanlab/spimprep-deps:v0.1.0'
 
diff --git a/workflow/macros/AutostitchMacro.ijm b/workflow/macros/AutostitchMacro.ijm
deleted file mode 100644
index e9df4df..0000000
--- a/workflow/macros/AutostitchMacro.ijm
+++ /dev/null
@@ -1,56 +0,0 @@
-args = getArgument()
-args = split(args, " ");
-
-dataset_xml = args[0]
-pairwise_method = args[1]
-ds_x = args[2]
-ds_y = args[3]
-ds_z = args[4]
-do_filter = args[5]
-min_r = args[6]
-do_global = args[7]
-global_strategy = args[8]
-
-run("Calculate pairwise shifts ...",
- "select=" + dataset_xml + 
-    " process_angle=[All angles] " + 
-    " process_channel=[All channels] " + 
-    " process_illumination=[All illuminations] " + 
-    " process_tile=[All tiles] " + 
-    " process_timepoint=[All Timepoints] " + 
-    " method=["+ pairwise_method +"] " + 
-    " channels=[Average Channels] " + 
-    " downsample_in_x=" +  ds_x + 
-    " downsample_in_y=" +  ds_y + 
-    " downsample_in_z=" +  ds_z );
-
-
-if ( do_filter == 1 ){
-
-run("Filter pairwise shifts ...",
- "select=" + dataset_xml + 
-    " min_r=" + min_r + 
-    " max_r=1 " + 
-    " max_shift_in_x=0 " + 
-    " max_shift_in_y=0 " + 
-    " max_shift_in_z=0 " + 
-    " max_displacement=0");
-}
-
-if ( do_global == 1 ){
-
-run("Optimize globally and apply shifts ...",
- "select=" + dataset_xml + 
-    " process_angle=[All angles] " + 
-    " process_channel=[All channels] " + 
-    " process_illumination=[All illuminations] " + 
-    " process_tile=[All tiles] " + 
-    " process_timepoint=[All Timepoints] " + 
-    " relative=2.500 " + 
-    " absolute=3.500 " + 
-    " global_optimization_strategy=["+global_strategy+"] " + 
-    " fix_group_0-0,");
-}
-
-// quit after we are finished
-eval("script", "System.exit(0);");
diff --git a/workflow/macros/FuseImageMacroZarr.ijm b/workflow/macros/FuseImageMacroZarr.ijm
deleted file mode 100644
index 9d49b06..0000000
--- a/workflow/macros/FuseImageMacroZarr.ijm
+++ /dev/null
@@ -1,47 +0,0 @@
-args = getArgument()
-args = split(args, " ");
-
-in_dataset_xml = args[0]
-downsampling = args[1]
-channel = args[2]
-output_zarr = args[3]
-block_size_x = args[4]
-block_size_y = args[5]
-block_size_z = args[6]
-block_size_factor_x = args[7]
-block_size_factor_y = args[8]
-block_size_factor_z = args[9]
-
-run("Fuse dataset ...",
- "select=" + in_dataset_xml +
-    " process_angle=[All angles] " + 
-    " process_channel=[Single channel (Select from List)] " + 
-    " processing_channel=[channel " + channel + "]" +
-    " process_illumination=[All illuminations] " + 
-    " process_tile=[All tiles] " + 
-    " process_timepoint=[All Timepoints] " + 
-    " bounding_box=[Currently Selected Views] " + 
-    " preserve_original " +
-    " downsampling=" + downsampling + 
-    " interpolation=[Linear Interpolation] " + 
-    " pixel_type=[16-bit unsigned integer] " + 
-    " interest_points_for_non_rigid=[-= Disable Non-Rigid =-] " + 
-    " blend produce=[Each timepoint & channel] " + 
-    " fused_image=[ZARR/N5/HDF5 export using N5-API] " +
-    " define_input=[Auto-load from input data (values shown below)] " +
-    " export=ZARR create_0 " +
-    " zarr_dataset_path=" + output_zarr + 
-    " zarr_base_dataset=/ " +
-    " zarr_dataset_extension=/s0 " + 
-    " show_advanced_block_size_options " +
-    " block_size_x=" + block_size_x +
-    " block_size_y=" + block_size_y +
-    " block_size_z=" + block_size_z +
-    " block_size_factor_x=" + block_size_factor_x +
-    " block_size_factor_y=" + block_size_factor_y +
-    " block_size_factor_z=" + block_size_factor_z +
-    " subsampling_factors=[{ {1,1,1}}]"); 
-
-
-// quit after we are finished
-eval("script", "System.exit(0);");
diff --git a/workflow/rules/bigstitcher.smk b/workflow/rules/bigstitcher.smk
index 4256e3b..24a4a3e 100644
--- a/workflow/rules/bigstitcher.smk
+++ b/workflow/rules/bigstitcher.smk
@@ -94,74 +94,7 @@ rule zarr_to_bdv:
         "../scripts/zarr_to_n5_bdv.py"
 
 
-rule bigstitcher:
-    input:
-        dataset_n5=rules.zarr_to_bdv.output.bdv_n5,
-        dataset_xml=rules.zarr_to_bdv.output.bdv_xml,
-        ijm=Path(workflow.basedir) / "macros" / "AutostitchMacro.ijm",
-    params:
-        fiji_launcher_cmd=get_fiji_launcher_cmd,
-        macro_args=get_macro_args_bigstitcher,
-        rm_old_xml=lambda wildcards, output: f"rm -f {output.dataset_xml}~?",
-    output:
-        launcher=temp(
-            bids(
-                root=work,
-                subject="{subject}",
-                datatype="micr",
-                sample="{sample}",
-                acq="{acq}",
-                desc="{desc}",
-                suffix="bigstitcherfiji.sh",
-            )
-        ),
-        dataset_xml=temp(
-            bids(
-                root=work,
-                subject="{subject}",
-                datatype="micr",
-                sample="{sample}",
-                acq="{acq}",
-                desc="{desc}",
-                suffix="bigstitcherfiji.xml",
-            )
-        ),
-    benchmark:
-        bids(
-            root="benchmarks",
-            datatype="bigstitcherfiji",
-            subject="{subject}",
-            sample="{sample}",
-            acq="{acq}",
-            desc="{desc}",
-            suffix="benchmark.tsv",
-        )
-    log:
-        bids(
-            root="logs",
-            datatype="bigstitcherfiji",
-            subject="{subject}",
-            sample="{sample}",
-            acq="{acq}",
-            desc="{desc}",
-            suffix="log.txt",
-        ),
-    container:
-        config["containers"]["spimprep"]
-    resources:
-        runtime=30,
-        mem_mb=10000,
-    threads: config["cores_per_rule"]
-    group:
-        "preproc"
-    shell:
-        "cp {input.dataset_xml} {output.dataset_xml} && "
-        " {params.fiji_launcher_cmd} && "
-        " echo ' -macro {input.ijm} \"{params.macro_args}\"' >> {output.launcher} "
-        " && {output.launcher} |& tee {log} && {params.rm_old_xml}"
-
-
-rule bigstitcher_spark_stitching:
+rule bigstitcher_stitching:
     input:
         dataset_n5=rules.zarr_to_bdv.output.bdv_n5,
         dataset_xml=rules.zarr_to_bdv.output.bdv_xml,
@@ -230,10 +163,10 @@ rule bigstitcher_spark_stitching:
         "{params.rm_old_xml}"
 
 
-rule bigstitcher_spark_solver:
+rule bigstitcher_solver:
     input:
         dataset_n5=rules.zarr_to_bdv.output.bdv_n5,
-        dataset_xml=rules.bigstitcher_spark_stitching.output.dataset_xml,
+        dataset_xml=rules.bigstitcher_stitching.output.dataset_xml,
     params:
         downsampling="--downsampling={dsx},{dsy},{dsz}".format(
             dsx=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_x"],
@@ -295,7 +228,6 @@ rule bigstitcher_spark_solver:
         "{params.rm_old_xml}"
         #lambda 0.1 is default (can expose this if needed)
 
-
 rule bigstitcher_fusion:
     input:
         dataset_n5=bids(
@@ -320,98 +252,6 @@ rule bigstitcher_fusion:
                 else "stitching"
             ),
         ),
-        ijm=Path(workflow.basedir) / "macros" / "FuseImageMacroZarr.ijm",
-    params:
-        fiji_launcher_cmd=get_fiji_launcher_cmd,
-        macro_args=get_macro_args_zarr_fusion,
-    output:
-        launcher=temp(
-            bids(
-                root=work,
-                subject="{subject}",
-                datatype="micr",
-                sample="{sample}",
-                acq="{acq}",
-                desc="stitched{desc}",
-                stain="{stain}",
-                suffix="fuseimagen5.sh",
-            )
-        ),
-        zarr=temp(
-            directory(
-                bids(
-                    root=work,
-                    subject="{subject}",
-                    datatype="micr",
-                    sample="{sample}",
-                    acq="{acq}",
-                    desc="stitched{desc}",
-                    stain="{stain}",
-                    suffix="SPIM.zarr",
-                )
-            )
-        ),
-    benchmark:
-        bids(
-            root="benchmarks",
-            datatype="fuse_dataset",
-            subject="{subject}",
-            sample="{sample}",
-            acq="{acq}",
-            desc="stitched{desc}",
-            stain="{stain}",
-            suffix="benchmark.tsv",
-        )
-    log:
-        bids(
-            root="logs",
-            datatype="fuse_dataset",
-            subject="{subject}",
-            sample="{sample}",
-            acq="{acq}",
-            desc="stitched{desc}",
-            stain="{stain}",
-            suffix="log.txt",
-        ),
-    container:
-        config["containers"]["spimprep"]
-    resources:
-        runtime=30,
-        mem_mb=40000,
-    threads: config["cores_per_rule"]
-    group:
-        "preproc"
-    shell:
-        " {params.fiji_launcher_cmd} && "
-        " echo ' -macro {input.ijm} \"{params.macro_args}\"' >> {output.launcher} "
-        " && {output.launcher} |& tee {log}"
-
-
-rule bigstitcher_spark_fusion:
-    input:
-        dataset_n5=bids(
-            root=work,
-            subject="{subject}",
-            datatype="micr",
-            sample="{sample}",
-            acq="{acq}",
-            desc="{desc}",
-            suffix="bdv.n5",
-        ),
-        dataset_xml=bids(
-            root=work,
-            subject="{subject}",
-            datatype="micr",
-            sample="{sample}",
-            acq="{acq}",
-            desc="{desc}",
-            suffix="bigstitcher{}.xml".format(
-                "solver"
-                if config["bigstitcher"]["global_optimization"]["enabled"]
-                else "stitching"
-            ),
-        ),
-        ijm=Path(workflow.basedir) / "macros" / "FuseImageMacroZarr.ijm",
     params:
         channel=lambda wildcards: "--channelId={channel}".format(
             channel=get_stains(wildcards).index(wildcards.stain)
@@ -438,7 +278,7 @@ rule bigstitcher_spark_fusion:
                     datatype="micr",
                     sample="{sample}",
                     acq="{acq}",
-                    desc="sparkstitched{desc}",
+                    desc="stitched{desc}",
                     stain="{stain}",
                     suffix="SPIM.zarr",
                 )
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
index 88d40e7..80a773a 100644
--- a/workflow/rules/common.smk
+++ b/workflow/rules/common.smk
@@ -257,52 +257,6 @@ def get_stains(wildcards):
     return df.iloc[0][stain_columns].dropna().tolist()
 
 
-# bigstitcher
-def get_fiji_launcher_cmd(wildcards, output, threads, resources):
-    launcher_opts_find = "-Xincgc"
-    launcher_opts_replace = f"-XX:+UseG1GC -verbose:gc -XX:+PrintGCDateStamps -XX:ActiveProcessorCount={threads}"
-    pipe_cmds = []
-    pipe_cmds.append("ImageJ-linux64 --dry-run --headless --console")
-    pipe_cmds.append(f"sed 's/{launcher_opts_find}/{launcher_opts_replace}'/")
-    pipe_cmds.append(
-        rf"sed 's/-Xmx[0-9a-z]\+/-Xmx{resources.mem_mb}m -Xms{resources.mem_mb}m/'"
-    )
-    pipe_cmds.append("tr --delete '\\n'")
-    return "|".join(pipe_cmds) + f" > {output.launcher} && chmod a+x {output.launcher} "
-
-
-def get_macro_args_bigstitcher(wildcards, input, output):
-    return "{dataset_xml} {pairwise_method} {ds_x} {ds_y} {ds_z} {do_filter} {min_r} {do_global} {global_strategy}".format(
-        dataset_xml=output.dataset_xml,
-        pairwise_method=config["bigstitcher"]["calc_pairwise_shifts"]["methods"][
-            config["bigstitcher"]["calc_pairwise_shifts"]["method"]
-        ],
-        ds_x=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_x"],
-        ds_y=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_y"],
-        ds_z=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_z"],
-        do_filter=config["bigstitcher"]["filter_pairwise_shifts"]["enabled"],
-        min_r=config["bigstitcher"]["filter_pairwise_shifts"]["min_r"],
-        do_global=config["bigstitcher"]["global_optimization"]["enabled"],
-        global_strategy=config["bigstitcher"]["global_optimization"]["strategies"][
-            config["bigstitcher"]["global_optimization"]["strategy"]
-        ],
-    )
-
-
-def get_macro_args_zarr_fusion(wildcards, input, output):
-    return "{dataset_xml} {downsampling} {channel:02d} {output_zarr} {bsx} {bsy} {bsz} {bsfx} {bsfy} {bsfz}".format(
-        dataset_xml=input.dataset_xml,
-        downsampling=config["bigstitcher"]["fuse_dataset"]["downsampling"],
-        channel=get_stains(wildcards).index(wildcards.stain),
-        output_zarr=output.zarr,
-        bsx=config["bigstitcher"]["fuse_dataset"]["block_size_x"],
-        bsy=config["bigstitcher"]["fuse_dataset"]["block_size_y"],
-        bsz=config["bigstitcher"]["fuse_dataset"]["block_size_z"],
-        bsfx=config["bigstitcher"]["fuse_dataset"]["block_size_factor_x"],
-        bsfy=config["bigstitcher"]["fuse_dataset"]["block_size_factor_y"],
-        bsfz=config["bigstitcher"]["fuse_dataset"]["block_size_factor_z"],
-    )
-
 
 def get_output_ome_zarr_uri():
     if is_remote(config["root"]):

From 65adb787a9a7dc9d496a5f8e3ead8420b0fb6f94 Mon Sep 17 00:00:00 2001
From: Ali Khan <alik@robarts.ca>
Date: Thu, 12 Sep 2024 18:09:43 -0400
Subject: [PATCH 03/18] updates for compatibility with input zstacks

Blaze microscope now outputs data with one tif file per zstack.
This adds compatibility for it.

TODO: test full end-to-end
---
 config/config.yml                         |  2 +-
 workflow/lib/dask_image.py                | 45 ++++++++++++++++
 workflow/rules/import.smk                 |  9 ----
 workflow/scripts/blaze_to_metadata.py     | 64 ++++++++++++++++++-----
 workflow/scripts/blaze_to_metadata_gcs.py | 59 ++++++++++++++++-----
 workflow/scripts/tif_to_zarr.py           | 40 ++++++++++----
 workflow/scripts/tif_to_zarr_gcs.py       | 53 +++++++++++++++++--
 7 files changed, 221 insertions(+), 51 deletions(-)
 create mode 100644 workflow/lib/dask_image.py

diff --git a/config/config.yml b/config/config.yml
index ae096d2..9858eb7 100644
--- a/config/config.yml
+++ b/config/config.yml
@@ -1,6 +1,5 @@
 datasets: 'config/datasets.tsv'
 
-
 root: 'bids' # can use a s3:// or gcs:// prefix to write output to cloud storage
 work: 'work' 
 
@@ -13,6 +12,7 @@ cores_per_rule: 32
 #import wildcards:  tilex, tiley, channel, zslice (and prefix - unused)
 import_blaze:
   raw_tif_pattern: "{prefix}_Blaze[{tilex} x {tiley}]_C{channel}_xyz-Table Z{zslice}.ome.tif"
+  raw_tif_pattern_zstack:  "{prefix}_Blaze[{tilex} x {tiley}]_C{channel}.ome.tif"
   intensity_rescaling: 0.5 #raw images seem to be at the upper end of uint16 (over-saturated) -- causes wrapping issues when adjusting with flatfield correction etc. this rescales the raw data as it imports it..
 
 import_prestitched:
diff --git a/workflow/lib/dask_image.py b/workflow/lib/dask_image.py
new file mode 100644
index 0000000..dbc06c3
--- /dev/null
+++ b/workflow/lib/dask_image.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+import os
+from glob import glob
+
+try:
+    import tifffile
+except (AttributeError, ImportError):
+    pass
+
+from dask.array.core import Array
+from dask.base import tokenize
+
+
+def add_leading_dimension(x):
+    return x[None, ...]
+
+def imread(fn, page):
+    return tifffile.imread(fn,key=page)
+
+
+def imread_pages(filename, preprocess=None):
+
+    
+    tif = tifffile.TiffFile(filename)
+    pages = [i for i in range(len(tif.pages))]
+    name = "imread-%s" % tokenize(filename, os.path.getmtime(filename))
+
+    sample = tif.pages[0].asarray()
+
+    if preprocess:
+        sample = preprocess(sample)
+
+    keys = [(name, i) + (0,) * len(sample.shape) for i in pages]
+    if preprocess:
+        values = [
+            (add_leading_dimension, (preprocess, (imread, filename,i))) for i in pages
+        ]
+    else:
+        values = [(add_leading_dimension, (imread, filename,i)) for i in pages]
+    dsk = dict(zip(keys, values))
+
+    chunks = ((1,) * len(pages),) + tuple((d,) for d in sample.shape)
+
+    return Array(dsk, name, chunks, sample.dtype)
diff --git a/workflow/rules/import.smk b/workflow/rules/import.smk
index 8a9074a..3906b36 100644
--- a/workflow/rules/import.smk
+++ b/workflow/rules/import.smk
@@ -79,11 +79,6 @@ rule blaze_to_metadata_gcs:
 rule blaze_to_metadata:
     input:
         ome_dir=get_input_dataset,
-    params:
-        in_tif_pattern=lambda wildcards, input: os.path.join(
-            input.ome_dir,
-            config["import_blaze"]["raw_tif_pattern"],
-        ),
     output:
         metadata_json=temp(
             bids(
@@ -197,10 +192,6 @@ rule tif_to_zarr:
         ome_dir=get_input_dataset,
         metadata_json=rules.copy_blaze_metadata.output.metadata_json,
     params:
-        in_tif_pattern=lambda wildcards, input: os.path.join(
-            input.ome_dir,
-            config["import_blaze"]["raw_tif_pattern"],
-        ),
         intensity_rescaling=config["import_blaze"]["intensity_rescaling"],
     output:
         zarr=temp(
diff --git a/workflow/scripts/blaze_to_metadata.py b/workflow/scripts/blaze_to_metadata.py
index 608158e..196dac5 100644
--- a/workflow/scripts/blaze_to_metadata.py
+++ b/workflow/scripts/blaze_to_metadata.py
@@ -4,25 +4,48 @@
 import re
 from itertools import product
 from snakemake.io import glob_wildcards
+from glob import glob
+from pathlib import Path
+import os
 
-in_tif_pattern = snakemake.params.in_tif_pattern
+tif_files = glob(f"{snakemake.input.ome_dir}/*.tif")
+
+#check first tif file to see if it is zstack or not:
+if 'xyz-Table Z' in Path(tif_files[0]).name:
+    is_zstack=False
+else:
+    is_zstack=True
+
+if is_zstack:
+    in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_zstack"])
+else:
+    in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern"])
 
 #add a wildcard constraint to ensure no
 #subfolders get parsed (ie don't match anything with / in it):
 prefix_constraint=r'[^/]+' 
 in_tif_pattern_constrained = in_tif_pattern.replace('{prefix}',f'{{prefix,{prefix_constraint}}}')
 
+
 #parse the filenames to get number of channels, tiles etc..
-prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern_constrained)
+if is_zstack:
+    prefix, tilex, tiley, channel = glob_wildcards(in_tif_pattern_constrained,tif_files)
+else:
+    prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern_constrained,tif_files)
 
 tiles_x = sorted(list(set(tilex)))
 tiles_y = sorted(list(set(tiley)))
 channels = sorted(list(set(channel)))
-zslices = sorted(list(set(zslice)))
 prefixes = sorted(list(set(prefix)))
 
-#read in series metadata from first file
-in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0],zslice=zslices[0])
+if not is_zstack:
+    zslices = sorted(list(set(zslice)))
+
+    #read in series metadata from first file
+    in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0],zslice=zslices[0])
+
+else:
+    in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0])
 
 raw_tif = tifffile.TiffFile(in_tif,mode='r')
 
@@ -37,12 +60,18 @@
 custom_metadata = ome_dict['OME']['Image']['ca:CustomAttributes']
 
 
-
 #read tile configuration from the microscope metadata
 if axes == 'CZYX':
-    tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+)_xyz-Table Z(?P<zslice>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+),(?P<chan>\S+), (?P<z>\S+)\)"
+    if is_zstack:
+        tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+),(?P<chan>\S+)\)"
+    else:
+        tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+)_xyz-Table Z(?P<zslice>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+),(?P<chan>\S+), (?P<z>\S+)\)"
 elif axes == 'ZYX': 
-    tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+)_xyz-Table Z(?P<zslice>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+), (?P<z>\S+)\)"
+    if is_zstack:
+        tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+)\)"
+    else:
+        tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+)_xyz-Table Z(?P<zslice>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+), (?P<z>\S+)\)"
+
 
 tile_pattern = re.compile(tile_config_pattern)
 
@@ -58,23 +87,29 @@
     chunks.append(chunk)
 
 for line in  custom_metadata['TileConfiguration']['@TileConfiguration'].split('  ')[1:]:
-    
     d = re.search(tile_pattern,line).groupdict()
     chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first
-    
-    #key is:  tile-{chunk}_chan-{channel}_z-{zslice} 
-    key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" 
+   
+    if is_zstack:
+        key = f"tile-{chunk}_chan-{d['channel']}_z-0" 
+    else:
+        #key is:  tile-{chunk}_chan-{channel}_z-{zslice} 
+        key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" 
 
     map_x[key] = float(d['x'])
     map_y[key] = float(d['y'])
-    map_z[key] = float(d['z'])
+    if is_zstack:
+        map_z[key] = float(0)
+    else:
+        map_z[key] = float(d['z'])
     
 
 metadata={}
 metadata['tiles_x'] = tiles_x
 metadata['tiles_y'] = tiles_y
 metadata['channels'] = channels
-metadata['zslices'] = zslices
+if not is_zstack:
+    metadata['zslices'] = zslices
 metadata['prefixes'] = prefixes
 metadata['chunks'] = chunks
 metadata['axes'] = axes
@@ -88,6 +123,7 @@
 metadata['ome_full_metadata'] = ome_dict
 metadata['PixelSize'] = [ metadata['physical_size_z']/1000.0, metadata['physical_size_y']/1000.0, metadata['physical_size_x']/1000.0 ] #zyx since OME-Zarr is ZYX
 metadata['PixelSizeUnits'] = 'mm' 
+metadata['is_zstack'] = is_zstack
 
 #write metadata to json
 with open(snakemake.output.metadata_json, 'w') as fp:
diff --git a/workflow/scripts/blaze_to_metadata_gcs.py b/workflow/scripts/blaze_to_metadata_gcs.py
index 7817d87..cff5302 100644
--- a/workflow/scripts/blaze_to_metadata_gcs.py
+++ b/workflow/scripts/blaze_to_metadata_gcs.py
@@ -5,12 +5,11 @@
 import os
 from itertools import product
 from snakemake.io import glob_wildcards
+from pathlib import Path
 import gcsfs
 from lib.cloud_io import get_fsspec
 
 dataset_uri = snakemake.params.dataset_path
-in_tif_pattern = snakemake.params.in_tif_pattern
-
 
 gcsfs_opts={'project': snakemake.params.storage_provider_settings['gcs'].get_settings().project,
                         'token': snakemake.input.creds}
@@ -18,18 +17,38 @@
 
 tifs = fs.glob(f"{dataset_uri}/*.tif")
 
+#check first tif file to see if it is zstack or not:
+if 'xyz-Table Z' in Path(tifs[0]).name:
+    is_zstack=False
+else:
+    is_zstack=True
+
+if is_zstack:
+    in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_zstack"]
+else:
+    in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern"]
+
+
+
 #parse the filenames to get number of channels, tiles etc..
-prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern,files=tifs)
+if is_zstack:
+    prefix, tilex, tiley, channel = glob_wildcards(in_tif_pattern,files=tifs)
+else:
+    prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern,files=tifs)
+    zslices = sorted(list(set(zslice)))
 
 tiles_x = sorted(list(set(tilex)))
 tiles_y = sorted(list(set(tiley)))
 channels = sorted(list(set(channel)))
-zslices = sorted(list(set(zslice)))
 prefixes = sorted(list(set(prefix)))
 
+if not is_zstack:
+    zslices = sorted(list(set(zslice)))
 
-#read in series metadata from first file
-in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0],zslice=zslices[0])
+    #read in series metadata from first file
+    in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0],zslice=zslices[0])
+else:
+    in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0])
 
 with fs.open(f"gcs://{in_tif}", 'rb') as tif_file:
     raw_tif = tifffile.TiffFile(tif_file,mode='r')
@@ -49,9 +68,15 @@
 
 #read tile configuration from the microscope metadata
 if axes == 'CZYX':
-    tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+)_xyz-Table Z(?P<zslice>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+),(?P<chan>\S+), (?P<z>\S+)\)"
+    if is_zstack:
+        tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+),(?P<chan>\S+)\)"
+    else:
+        tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+)_xyz-Table Z(?P<zslice>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+),(?P<chan>\S+), (?P<z>\S+)\)"
 elif axes == 'ZYX': 
-    tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+)_xyz-Table Z(?P<zslice>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+), (?P<z>\S+)\)"
+    if is_zstack:
+        tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+)\)"
+    else:
+        tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+)_xyz-Table Z(?P<zslice>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+), (?P<z>\S+)\)"
 
 tile_pattern = re.compile(tile_config_pattern)
 
@@ -71,19 +96,28 @@
     d = re.search(tile_pattern,line).groupdict()
     chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first
     
-    #key is:  tile-{chunk}_chan-{channel}_z-{zslice} 
-    key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" 
+    if is_zstack:
+        key = f"tile-{chunk}_chan-{d['channel']}_z-0" 
+    else:
+        #key is:  tile-{chunk}_chan-{channel}_z-{zslice} 
+        key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" 
 
     map_x[key] = float(d['x'])
     map_y[key] = float(d['y'])
-    map_z[key] = float(d['z'])
+    if is_zstack:
+        map_z[key] = float(0)
+    else:
+        map_z[key] = float(d['z'])
     
 
+       
+
 metadata={}
 metadata['tiles_x'] = tiles_x
 metadata['tiles_y'] = tiles_y
 metadata['channels'] = channels
-metadata['zslices'] = zslices
+if not is_zstack:
+    metadata['zslices'] = zslices
 metadata['prefixes'] = prefixes
 metadata['chunks'] = chunks
 metadata['axes'] = axes
@@ -97,6 +131,7 @@
 metadata['ome_full_metadata'] = ome_dict
 metadata['PixelSize'] = [ metadata['physical_size_z']/1000.0, metadata['physical_size_y']/1000.0, metadata['physical_size_x']/1000.0 ] #zyx since OME-Zarr is ZYX
 metadata['PixelSizeUnits'] = 'mm' 
+metadata['is_zstack'] = is_zstack
 
 #write metadata to json
 with open(snakemake.output.metadata_json, 'w') as fp:
diff --git a/workflow/scripts/tif_to_zarr.py b/workflow/scripts/tif_to_zarr.py
index bfd4a86..d00d461 100644
--- a/workflow/scripts/tif_to_zarr.py
+++ b/workflow/scripts/tif_to_zarr.py
@@ -1,7 +1,9 @@
 import tifffile
+import os
 import json
 import dask.array as da
-import dask.array.image 
+from dask.array.image import imread as imread_tifs
+from lib.dask_image import imread_pages
 from itertools import product
 from dask.diagnostics import ProgressBar
 
@@ -21,14 +23,24 @@ def single_imread(*args):
     return tifffile.imread(*args,key=0)
 
 
-#use tif pattern but replace the [ and ] with [[] and []] so glob doesn't choke
-in_tif_glob = replace_square_brackets(str(snakemake.params.in_tif_pattern))
-
 
 #read metadata json
 with open(snakemake.input.metadata_json) as fp:
     metadata = json.load(fp)
 
+
+is_zstack = metadata['is_zstack']
+
+if is_zstack:
+    in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_zstack"])
+else:
+    in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern"])
+
+
+#use tif pattern but replace the [ and ] with [[] and []] so glob doesn't choke
+in_tif_glob = replace_square_brackets(str(in_tif_pattern))
+
+
 #TODO: put these in top-level metadata for easier access..
 size_x=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeX']
 size_y=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeY']
@@ -36,31 +48,39 @@ def single_imread(*args):
 size_c=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeC']
 size_tiles=len(metadata['tiles_x'])*len(metadata['tiles_y'])
 
-
 #now get the first channel and first zslice tif 
 tiles=[]
 for i_tile,(tilex,tiley) in enumerate(product(metadata['tiles_x'],metadata['tiles_y'])):
-        
+    print(f'tile {tilex}x{tiley}, {i_tile}') 
     zstacks=[]
     for i_chan,channel in enumerate(metadata['channels']):
 
-            
-        zstacks.append(dask.array.image.imread(in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*'), imread=single_imread))
-        
+        print(f'channel {i_chan}')
+
+        if is_zstack:
+            tif_file = in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel)
+
+            zstacks.append(imread_pages(tif_file))
+            print(zstacks[-1].shape)
+        else:
+            zstacks.append(imread_tifs(in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*'), imread=single_imread))
 
     #have list of zstack dask arrays for the tile, one for each channel
     #stack them up and append to list of tiles
     tiles.append(da.stack(zstacks))
 
 
+
 #now we have list of tiles, each a dask array
 #stack them up to get our final array
 darr = da.stack(tiles)
 
+print(darr.shape)
+print(darr.chunksize)
+
 #rescale intensities, and recast 
 darr = darr * snakemake.params.intensity_rescaling
 darr = darr.astype('uint16')
-
 #now we can do the computation itself, storing to zarr
 print('writing images to zarr with dask')
 with ProgressBar():
diff --git a/workflow/scripts/tif_to_zarr_gcs.py b/workflow/scripts/tif_to_zarr_gcs.py
index 01a9af1..9c234c2 100644
--- a/workflow/scripts/tif_to_zarr_gcs.py
+++ b/workflow/scripts/tif_to_zarr_gcs.py
@@ -4,6 +4,7 @@
 import dask.array.image 
 from itertools import product
 from dask.diagnostics import ProgressBar
+from lib.dask_image import imread_pages
 import gcsfs
 
 gcsfs_opts={'project': snakemake.params.storage_provider_settings['gcs'].get_settings().project,
@@ -20,6 +21,19 @@ def replace_square_brackets(pattern):
     pattern = pattern.replace('##RIGHTBRACKET##','[]]')
     return pattern
 
+def get_zstack_metadata(gcs_uri,fs):
+    with fs.open(gcs_uri, 'rb') as file:
+        tif = tifffile.TiffFile(file)
+        pages = tif.pages
+        n_z = len(pages)
+        sample = tif.pages[0].asarray()
+        return {'n_z': n_z, 'shape': sample.shape, 'dtype': sample.dtype}
+
+
+def get_tiff_num_pages(fs,gcs_uri):
+    with fs.open(gcs_uri, 'rb') as file:
+        return len(tifffile.TiffFile(file).pages)
+
 def read_tiff_slice(fs,gcs_uri, key=0):
     """Read a single TIFF slice from GCS."""
     with fs.open(gcs_uri, 'rb') as file:
@@ -37,16 +51,38 @@ def build_zstack(gcs_uris,fs):
     # Convert the list of delayed objects into a Dask array
     return da.stack([da.from_delayed(lazy_array, shape=sample_array.shape, dtype=dtype) for lazy_array in lazy_arrays], axis=0)
 
+def build_zstack_from_single(gcs_uri,zstack_metadata,fs):
+    """Build a z-stack from a single GCS URI  """
 
+    pages = [i for i in range(zstack_metadata['n_z'])]
+    lazy_arrays = [
+        dask.delayed(read_tiff_slice)(fs,gcs_uri,page) for page in pages
+    ]
+
+    # Convert the list of delayed objects into a Dask array
+    return da.stack([da.from_delayed(lazy_array, shape=zstack_metadata['shape'], dtype=zstack_metadata['dtype']) for lazy_array in lazy_arrays], axis=0)
 
-#use tif pattern but replace the [ and ] with [[] and []] so glob doesn't choke
-in_tif_glob = replace_square_brackets(str(snakemake.params.in_tif_pattern))
 
 
 #read metadata json
 with open(snakemake.input.metadata_json) as fp:
     metadata = json.load(fp)
 
+
+is_zstack = metadata['is_zstack']
+
+if is_zstack:
+    in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_zstack"]
+else:
+    in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern"]
+
+
+
+#use tif pattern but replace the [ and ] with [[] and []] so glob doesn't choke
+in_tif_glob = replace_square_brackets(str(in_tif_pattern))
+
+
+
 #TODO: put these in top-level metadata for easier access..
 size_x=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeX']
 size_y=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeY']
@@ -56,14 +92,21 @@ def build_zstack(gcs_uris,fs):
 
 
 #now get the first channel and first zslice tif 
+
+if is_zstack:
+    #prepopulate this since its the same for all tiles, and takes time to query
+    zstack_metadata = get_zstack_metadata('gcs://'+in_tif_pattern.format(tilex=metadata['tiles_x'][0],tiley=metadata['tiles_y'][0],prefix=metadata['prefixes'][0],channel=metadata['channels'][0]),fs=fs)
+
 tiles=[]
 for i_tile,(tilex,tiley) in enumerate(product(metadata['tiles_x'],metadata['tiles_y'])):
         
     zstacks=[]
     for i_chan,channel in enumerate(metadata['channels']):
-
-            
-        zstacks.append(build_zstack(fs.glob('gcs://'+in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*')),fs=fs))
+    
+        if is_zstack:            
+            zstacks.append(build_zstack_from_single('gcs://'+in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel),fs=fs,zstack_metadata=zstack_metadata))
+        else:
+            zstacks.append(build_zstack(fs.glob('gcs://'+in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*')),fs=fs))
         
 
     #have list of zstack dask arrays for the tile, one for each channel

From 1ebbb745ab97f16d5aee333999f867542c0a54e5 Mon Sep 17 00:00:00 2001
From: Ali Khan <alik@robarts.ca>
Date: Sat, 28 Sep 2024 07:32:46 -0400
Subject: [PATCH 04/18] use pyvips for tiff reading on zstacks

todo: do the same for gcs version
---
 workflow/scripts/tif_to_zarr.py | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/workflow/scripts/tif_to_zarr.py b/workflow/scripts/tif_to_zarr.py
index d00d461..4d61196 100644
--- a/workflow/scripts/tif_to_zarr.py
+++ b/workflow/scripts/tif_to_zarr.py
@@ -1,7 +1,9 @@
 import tifffile
 import os
 import json
+import pyvips
 import dask.array as da
+from dask.delayed import delayed
 from dask.array.image import imread as imread_tifs
 from lib.dask_image import imread_pages
 from itertools import product
@@ -23,6 +25,10 @@ def single_imread(*args):
     return tifffile.imread(*args,key=0)
 
 
+def read_page_as_numpy(tif_file,page):    
+    """gets a single page (i.e. 2d image) from a tif file zstack"""
+    return pyvips.Image.new_from_file(tif_file, page=page).numpy()
+
 
 #read metadata json
 with open(snakemake.input.metadata_json) as fp:
@@ -42,10 +48,10 @@ def single_imread(*args):
 
 
 #TODO: put these in top-level metadata for easier access..
-size_x=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeX']
-size_y=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeY']
-size_z=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeZ']
-size_c=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeC']
+size_x=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeX'])
+size_y=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeY'])
+size_z=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeZ'])
+size_c=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeC'])
 size_tiles=len(metadata['tiles_x'])*len(metadata['tiles_y'])
 
 #now get the first channel and first zslice tif 
@@ -59,8 +65,13 @@ def single_imread(*args):
 
         if is_zstack:
             tif_file = in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel)
-
-            zstacks.append(imread_pages(tif_file))
+            
+            pages=[]
+            #read each page
+            for i_z in range(size_z):
+                pages.append(da.from_delayed(delayed(read_page_as_numpy)(tif_file,i_z),shape=(size_y,size_x),dtype='uint16'))
+            
+            zstacks.append(da.stack(pages))
             print(zstacks[-1].shape)
         else:
             zstacks.append(imread_tifs(in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*'), imread=single_imread))

From 78fe03f53e97b227ba4db9f503b90e57c1747b41 Mon Sep 17 00:00:00 2001
From: Ali Khan <alik@robarts.ca>
Date: Sat, 28 Sep 2024 10:12:12 -0400
Subject: [PATCH 05/18] WIP: compatibility for 1x1 blaze acquisitions (1 tile)

- disable fieldfrac? skip stitching?
---
 workflow/scripts/blaze_to_metadata.py | 142 ++++++++++++++++----------
 workflow/scripts/tif_to_zarr.py       |  80 ++++++++++-----
 2 files changed, 138 insertions(+), 84 deletions(-)

diff --git a/workflow/scripts/blaze_to_metadata.py b/workflow/scripts/blaze_to_metadata.py
index 196dac5..6eb88cd 100644
--- a/workflow/scripts/blaze_to_metadata.py
+++ b/workflow/scripts/blaze_to_metadata.py
@@ -16,10 +16,19 @@
 else:
     is_zstack=True
 
-if is_zstack:
-    in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_zstack"])
+#now check to see if there is just single tile
+if ' x ' in Path(tif_files[0]).name:
+    is_tiled=True
 else:
-    in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern"])
+    is_tiled=False
+
+if is_tiled:
+    if is_zstack:
+        in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_zstack"])
+    else:
+        in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern"])
+else:
+    in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_notile"])
 
 #add a wildcard constraint to ensure no
 #subfolders get parsed (ie don't match anything with / in it):
@@ -28,24 +37,34 @@
 
 
 #parse the filenames to get number of channels, tiles etc..
-if is_zstack:
-    prefix, tilex, tiley, channel = glob_wildcards(in_tif_pattern_constrained,tif_files)
+if is_tiled:
+    if is_zstack:
+        prefix, tilex, tiley, channel = glob_wildcards(in_tif_pattern_constrained,tif_files)
+    else:
+        prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern_constrained,tif_files)
 else:
-    prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern_constrained,tif_files)
+    prefix, channel, zslice = glob_wildcards(in_tif_pattern_constrained,tif_files)
+
+if is_tiled:
+    tiles_x = sorted(list(set(tilex)))
+    tiles_y = sorted(list(set(tiley)))
 
-tiles_x = sorted(list(set(tilex)))
-tiles_y = sorted(list(set(tiley)))
 channels = sorted(list(set(channel)))
 prefixes = sorted(list(set(prefix)))
 
+
 if not is_zstack:
     zslices = sorted(list(set(zslice)))
 
-    #read in series metadata from first file
-    in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0],zslice=zslices[0])
-
+if is_tiled:
+    if is_zstack:
+        in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0])
+    else:
+        #read in series metadata from first file
+        in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0],zslice=zslices[0])
 else:
-    in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0])
+    in_tif = in_tif_pattern.format(prefix=prefixes[0],channel=channels[0],zslice=zslices[0])
+    
 
 raw_tif = tifffile.TiffFile(in_tif,mode='r')
 
@@ -62,68 +81,79 @@
 
 #read tile configuration from the microscope metadata
 if axes == 'CZYX':
-    if is_zstack:
-        tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+),(?P<chan>\S+)\)"
+    if is_tiled:
+        if is_zstack:
+            tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+),(?P<chan>\S+)\)"
+        else:
+            tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+)_xyz-Table Z(?P<zslice>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+),(?P<chan>\S+), (?P<z>\S+)\)"
     else:
-        tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+)_xyz-Table Z(?P<zslice>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+),(?P<chan>\S+), (?P<z>\S+)\)"
+        print('single tile, axes=CZYX')
+            
 elif axes == 'ZYX': 
-    if is_zstack:
-        tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+)\)"
+    if is_tiled:
+        if is_zstack:
+            tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+)\)"
+        else:
+            tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+)_xyz-Table Z(?P<zslice>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+), (?P<z>\S+)\)"
     else:
-        tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+)_xyz-Table Z(?P<zslice>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+), (?P<z>\S+)\)"
-
-
-tile_pattern = re.compile(tile_config_pattern)
-
-#put it in 3 maps, one for each coord, indexed by tilex, tiley, channel, and aslice
-map_x=dict()
-map_y=dict()
-map_z=dict()
-
-map_tiles_to_chunk=dict()
-chunks = []
-for chunk,(tilex,tiley) in enumerate(product(tiles_x,tiles_y)):
-    map_tiles_to_chunk[tilex+tiley] = chunk
-    chunks.append(chunk)
-
-for line in  custom_metadata['TileConfiguration']['@TileConfiguration'].split('  ')[1:]:
-    d = re.search(tile_pattern,line).groupdict()
-    chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first
-   
-    if is_zstack:
-        key = f"tile-{chunk}_chan-{d['channel']}_z-0" 
-    else:
-        #key is:  tile-{chunk}_chan-{channel}_z-{zslice} 
-        key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" 
-
-    map_x[key] = float(d['x'])
-    map_y[key] = float(d['y'])
-    if is_zstack:
-        map_z[key] = float(0)
-    else:
-        map_z[key] = float(d['z'])
-    
+        print('single tile, axes=ZYX')
+
+if is_tiled:
+    tile_pattern = re.compile(tile_config_pattern)
+
+    #put it in 3 maps, one for each coord, indexed by tilex, tiley, channel, and aslice
+    map_x=dict()
+    map_y=dict()
+    map_z=dict()
+
+    map_tiles_to_chunk=dict()
+    chunks = []
+    for chunk,(tilex,tiley) in enumerate(product(tiles_x,tiles_y)):
+        map_tiles_to_chunk[tilex+tiley] = chunk
+        chunks.append(chunk)
+
+    for line in  custom_metadata['TileConfiguration']['@TileConfiguration'].split('  ')[1:]:
+        d = re.search(tile_pattern,line).groupdict()
+        chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first
+       
+        if is_zstack:
+            key = f"tile-{chunk}_chan-{d['channel']}_z-0" 
+        else:
+            #key is:  tile-{chunk}_chan-{channel}_z-{zslice} 
+            key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" 
+
+        map_x[key] = float(d['x'])
+        map_y[key] = float(d['y'])
+        if is_zstack:
+            map_z[key] = float(0)
+        else:
+            map_z[key] = float(d['z'])
+        
 
 metadata={}
-metadata['tiles_x'] = tiles_x
-metadata['tiles_y'] = tiles_y
+if is_tiled:
+    metadata['tiles_x'] = tiles_x
+    metadata['tiles_y'] = tiles_y
+    metadata['chunks'] = chunks
+    metadata['lookup_tile_offset_x'] = map_x
+    metadata['lookup_tile_offset_y'] = map_y
+    metadata['lookup_tile_offset_z'] = map_z
+
 metadata['channels'] = channels
+
 if not is_zstack:
     metadata['zslices'] = zslices
 metadata['prefixes'] = prefixes
-metadata['chunks'] = chunks
 metadata['axes'] = axes
 metadata['shape'] = shape
 metadata['physical_size_x'] = float(physical_size_x)
 metadata['physical_size_y'] = float(physical_size_y)
 metadata['physical_size_z'] = float(physical_size_z)
-metadata['lookup_tile_offset_x'] = map_x
-metadata['lookup_tile_offset_y'] = map_y
-metadata['lookup_tile_offset_z'] = map_z
 metadata['ome_full_metadata'] = ome_dict
 metadata['PixelSize'] = [ metadata['physical_size_z']/1000.0, metadata['physical_size_y']/1000.0, metadata['physical_size_x']/1000.0 ] #zyx since OME-Zarr is ZYX
 metadata['PixelSizeUnits'] = 'mm' 
 metadata['is_zstack'] = is_zstack
+metadata['is_tiled'] = is_tiled
 
 #write metadata to json
 with open(snakemake.output.metadata_json, 'w') as fp:
diff --git a/workflow/scripts/tif_to_zarr.py b/workflow/scripts/tif_to_zarr.py
index 4d61196..034a9cb 100644
--- a/workflow/scripts/tif_to_zarr.py
+++ b/workflow/scripts/tif_to_zarr.py
@@ -36,11 +36,16 @@ def read_page_as_numpy(tif_file,page):
 
 
 is_zstack = metadata['is_zstack']
+is_tiled = metadata['is_tiled']
 
-if is_zstack:
-    in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_zstack"])
+if is_tiled:
+    if is_zstack:
+        in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_zstack"])
+    else:
+        in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern"])
 else:
-    in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern"])
+    in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_notile"])
+
 
 
 #use tif pattern but replace the [ and ] with [[] and []] so glob doesn't choke
@@ -52,39 +57,58 @@ def read_page_as_numpy(tif_file,page):
 size_y=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeY'])
 size_z=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeZ'])
 size_c=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeC'])
-size_tiles=len(metadata['tiles_x'])*len(metadata['tiles_y'])
 
-#now get the first channel and first zslice tif 
-tiles=[]
-for i_tile,(tilex,tiley) in enumerate(product(metadata['tiles_x'],metadata['tiles_y'])):
-    print(f'tile {tilex}x{tiley}, {i_tile}') 
+
+
+
+if is_tiled:
+
+    size_tiles=len(metadata['tiles_x'])*len(metadata['tiles_y'])
+
+    #now get the first channel and first zslice tif 
+    tiles=[]
+    for i_tile,(tilex,tiley) in enumerate(product(metadata['tiles_x'],metadata['tiles_y'])):
+        print(f'tile {tilex}x{tiley}, {i_tile}') 
+        zstacks=[]
+        for i_chan,channel in enumerate(metadata['channels']):
+
+            print(f'channel {i_chan}')
+
+            if is_zstack:
+                tif_file = in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel)
+                
+                pages=[]
+                #read each page
+                for i_z in range(size_z):
+                    pages.append(da.from_delayed(delayed(read_page_as_numpy)(tif_file,i_z),shape=(size_y,size_x),dtype='uint16'))
+                
+                zstacks.append(da.stack(pages))
+                print(zstacks[-1].shape)
+            else:
+                zstacks.append(imread_tifs(in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*'), imread=single_imread))
+
+        #have list of zstack dask arrays for the tile, one for each channel
+        #stack them up and append to list of tiles
+        tiles.append(da.stack(zstacks))
+
+
+
+    #now we have list of tiles, each a dask array
+    #stack them up to get our final array
+    darr = da.stack(tiles)
+
+else:
+    #single tile, zslices:
     zstacks=[]
     for i_chan,channel in enumerate(metadata['channels']):
 
         print(f'channel {i_chan}')
+        zstacks.append(imread_tifs(in_tif_glob.format(prefix=metadata['prefixes'][0],channel=channel,zslice='*'), imread=single_imread))
 
-        if is_zstack:
-            tif_file = in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel)
-            
-            pages=[]
-            #read each page
-            for i_z in range(size_z):
-                pages.append(da.from_delayed(delayed(read_page_as_numpy)(tif_file,i_z),shape=(size_y,size_x),dtype='uint16'))
-            
-            zstacks.append(da.stack(pages))
-            print(zstacks[-1].shape)
-        else:
-            zstacks.append(imread_tifs(in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*'), imread=single_imread))
-
-    #have list of zstack dask arrays for the tile, one for each channel
-    #stack them up and append to list of tiles
-    tiles.append(da.stack(zstacks))
-
+    #stack the channels up
+    darr = da.stack(zstacks)
 
 
-#now we have list of tiles, each a dask array
-#stack them up to get our final array
-darr = da.stack(tiles)
 
 print(darr.shape)
 print(darr.chunksize)

From 4d1b90eeeace555323720d59ba048b85d08bd488 Mon Sep 17 00:00:00 2001
From: Ali Khan <alik@robarts.ca>
Date: Sun, 29 Sep 2024 16:04:21 -0400
Subject: [PATCH 06/18] fix for tileconfig key zstack 0->0000

---
 workflow/scripts/blaze_to_metadata.py     | 2 +-
 workflow/scripts/blaze_to_metadata_gcs.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/workflow/scripts/blaze_to_metadata.py b/workflow/scripts/blaze_to_metadata.py
index 6eb88cd..25a27e3 100644
--- a/workflow/scripts/blaze_to_metadata.py
+++ b/workflow/scripts/blaze_to_metadata.py
@@ -117,7 +117,7 @@
         chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first
        
         if is_zstack:
-            key = f"tile-{chunk}_chan-{d['channel']}_z-0" 
+            key = f"tile-{chunk}_chan-{d['channel']}_z-0000" 
         else:
             #key is:  tile-{chunk}_chan-{channel}_z-{zslice} 
             key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" 
diff --git a/workflow/scripts/blaze_to_metadata_gcs.py b/workflow/scripts/blaze_to_metadata_gcs.py
index cff5302..fac9747 100644
--- a/workflow/scripts/blaze_to_metadata_gcs.py
+++ b/workflow/scripts/blaze_to_metadata_gcs.py
@@ -97,7 +97,7 @@
     chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first
     
     if is_zstack:
-        key = f"tile-{chunk}_chan-{d['channel']}_z-0" 
+        key = f"tile-{chunk}_chan-{d['channel']}_z-0000" 
     else:
         #key is:  tile-{chunk}_chan-{channel}_z-{zslice} 
         key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" 

From a2ce2186633811c642a2f5272c2f447cf6dfe80c Mon Sep 17 00:00:00 2001
From: Ali Khan <alik@robarts.ca>
Date: Tue, 1 Oct 2024 13:05:21 -0400
Subject: [PATCH 07/18] update the tif_to_zarr_gcs for tifzstacks on google
 cloud

---
 workflow/scripts/blaze_to_metadata_gcs.py | 139 +++++++++++++---------
 workflow/scripts/tif_to_zarr_gcs.py       |  99 +++++++++------
 2 files changed, 147 insertions(+), 91 deletions(-)

diff --git a/workflow/scripts/blaze_to_metadata_gcs.py b/workflow/scripts/blaze_to_metadata_gcs.py
index fac9747..b652170 100644
--- a/workflow/scripts/blaze_to_metadata_gcs.py
+++ b/workflow/scripts/blaze_to_metadata_gcs.py
@@ -23,25 +23,44 @@
 else:
     is_zstack=True
 
-if is_zstack:
-    in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_zstack"]
+#now check to see if there is just single tile
+if ' x ' in Path(tifs[0]).name:
+    is_tiled=True
 else:
-    in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern"]
+    is_tiled=False
+
+
+
+if is_tiled:
+    if is_zstack:
+        in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_zstack"]
+    else:
+        in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern"]
+else:
+    in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_notile"]
 
 
 
 #parse the filenames to get number of channels, tiles etc..
-if is_zstack:
-    prefix, tilex, tiley, channel = glob_wildcards(in_tif_pattern,files=tifs)
+if is_tiled:
+    if is_zstack:
+        prefix, tilex, tiley, channel = glob_wildcards(in_tif_pattern,files=tifs)
+    else:
+        prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern,files=tifs)
 else:
-    prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern,files=tifs)
-    zslices = sorted(list(set(zslice)))
+    prefix, channel, zslice = glob_wildcards(in_tif_pattern,files=tifs)
+
+if is_tiled:
+    tiles_x = sorted(list(set(tilex)))
+    tiles_y = sorted(list(set(tiley)))
 
-tiles_x = sorted(list(set(tilex)))
-tiles_y = sorted(list(set(tiley)))
 channels = sorted(list(set(channel)))
 prefixes = sorted(list(set(prefix)))
 
+if not is_zstack:
+    zslices = sorted(list(set(zslice)))
+
+
 if not is_zstack:
     zslices = sorted(list(set(zslice)))
 
@@ -68,70 +87,80 @@
 
 #read tile configuration from the microscope metadata
 if axes == 'CZYX':
-    if is_zstack:
-        tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+),(?P<chan>\S+)\)"
-    else:
-        tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+)_xyz-Table Z(?P<zslice>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+),(?P<chan>\S+), (?P<z>\S+)\)"
-elif axes == 'ZYX': 
-    if is_zstack:
-        tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+)\)"
-    else:
-        tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+)_xyz-Table Z(?P<zslice>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+), (?P<z>\S+)\)"
-
-tile_pattern = re.compile(tile_config_pattern)
-
-#put it in 3 maps, one for each coord, indexed by tilex, tiley, channel, and aslice
-map_x=dict()
-map_y=dict()
-map_z=dict()
-
-map_tiles_to_chunk=dict()
-chunks = []
-for chunk,(tilex,tiley) in enumerate(product(tiles_x,tiles_y)):
-    map_tiles_to_chunk[tilex+tiley] = chunk
-    chunks.append(chunk)
-
-for line in  custom_metadata['TileConfiguration']['@TileConfiguration'].split('  ')[1:]:
-    
-    d = re.search(tile_pattern,line).groupdict()
-    chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first
-    
-    if is_zstack:
-        key = f"tile-{chunk}_chan-{d['channel']}_z-0000" 
+    if is_tiled:
+        if is_zstack:
+            tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+),(?P<chan>\S+)\)"
+        else:
+            tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+)_xyz-Table Z(?P<zslice>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+),(?P<chan>\S+), (?P<z>\S+)\)"
     else:
-        #key is:  tile-{chunk}_chan-{channel}_z-{zslice} 
-        key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" 
+        print('single tile, axes=CZYX')
 
-    map_x[key] = float(d['x'])
-    map_y[key] = float(d['y'])
-    if is_zstack:
-        map_z[key] = float(0)
+elif axes == 'ZYX': 
+    if is_tiled:
+        if is_zstack:
+            tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+)\)"
+        else:
+            tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+)_xyz-Table Z(?P<zslice>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+), (?P<z>\S+)\)"
     else:
-        map_z[key] = float(d['z'])
-    
-
-       
+        print('single tile, axes=ZYX')
+
+if is_tiled:
+    tile_pattern = re.compile(tile_config_pattern)
+
+    #put it in 3 maps, one for each coord, indexed by tilex, tiley, channel, and aslice
+    map_x=dict()
+    map_y=dict()
+    map_z=dict()
+
+    map_tiles_to_chunk=dict()
+    chunks = []
+    for chunk,(tilex,tiley) in enumerate(product(tiles_x,tiles_y)):
+        map_tiles_to_chunk[tilex+tiley] = chunk
+        chunks.append(chunk)
+
+    for line in  custom_metadata['TileConfiguration']['@TileConfiguration'].split('  ')[1:]:
+        
+        d = re.search(tile_pattern,line).groupdict()
+        chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first
+        
+        if is_zstack:
+            key = f"tile-{chunk}_chan-{d['channel']}_z-0000" 
+        else:
+            #key is:  tile-{chunk}_chan-{channel}_z-{zslice} 
+            key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" 
+
+        map_x[key] = float(d['x'])
+        map_y[key] = float(d['y'])
+        if is_zstack:
+            map_z[key] = float(0)
+        else:
+            map_z[key] = float(d['z'])
+        
 
 metadata={}
-metadata['tiles_x'] = tiles_x
-metadata['tiles_y'] = tiles_y
+if is_tiled:
+    metadata['tiles_x'] = tiles_x
+    metadata['tiles_y'] = tiles_y
+    metadata['chunks'] = chunks
+    metadata['lookup_tile_offset_x'] = map_x
+    metadata['lookup_tile_offset_y'] = map_y
+    metadata['lookup_tile_offset_z'] = map_z
+
 metadata['channels'] = channels
+
 if not is_zstack:
     metadata['zslices'] = zslices
 metadata['prefixes'] = prefixes
-metadata['chunks'] = chunks
 metadata['axes'] = axes
 metadata['shape'] = shape
 metadata['physical_size_x'] = float(physical_size_x)
 metadata['physical_size_y'] = float(physical_size_y)
 metadata['physical_size_z'] = float(physical_size_z)
-metadata['lookup_tile_offset_x'] = map_x
-metadata['lookup_tile_offset_y'] = map_y
-metadata['lookup_tile_offset_z'] = map_z
 metadata['ome_full_metadata'] = ome_dict
 metadata['PixelSize'] = [ metadata['physical_size_z']/1000.0, metadata['physical_size_y']/1000.0, metadata['physical_size_x']/1000.0 ] #zyx since OME-Zarr is ZYX
 metadata['PixelSizeUnits'] = 'mm' 
 metadata['is_zstack'] = is_zstack
+metadata['is_tiled'] = is_tiled
 
 #write metadata to json
 with open(snakemake.output.metadata_json, 'w') as fp:
diff --git a/workflow/scripts/tif_to_zarr_gcs.py b/workflow/scripts/tif_to_zarr_gcs.py
index 9c234c2..ca9ea11 100644
--- a/workflow/scripts/tif_to_zarr_gcs.py
+++ b/workflow/scripts/tif_to_zarr_gcs.py
@@ -1,6 +1,7 @@
 import tifffile
 import json
 import dask.array as da
+from dask.delayed import delayed
 import dask.array.image 
 from itertools import product
 from dask.diagnostics import ProgressBar
@@ -21,14 +22,6 @@ def replace_square_brackets(pattern):
     pattern = pattern.replace('##RIGHTBRACKET##','[]]')
     return pattern
 
-def get_zstack_metadata(gcs_uri,fs):
-    with fs.open(gcs_uri, 'rb') as file:
-        tif = tifffile.TiffFile(file)
-        pages = tif.pages
-        n_z = len(pages)
-        sample = tif.pages[0].asarray()
-        return {'n_z': n_z, 'shape': sample.shape, 'dtype': sample.dtype}
-
 
 def get_tiff_num_pages(fs,gcs_uri):
     with fs.open(gcs_uri, 'rb') as file:
@@ -39,6 +32,19 @@ def read_tiff_slice(fs,gcs_uri, key=0):
     with fs.open(gcs_uri, 'rb') as file:
         return tifffile.imread(file, key=key)
 
+def read_page_as_numpy(tif_file_uri, page, fs=None):
+    """Gets a single page (i.e., 2D image) from a tif file z-stack stored in a cloud URI."""
+    
+    # Open the file from the cloud storage using fsspec
+    with fs.open(tif_file_uri, 'rb') as f:
+        # Read the file content into a buffer
+        file_buffer = f.read()
+    
+    # Use pyvips to read from the buffer
+    return pyvips.Image.new_from_buffer(file_buffer, "", page=page).numpy()
+
+
+
 def build_zstack(gcs_uris,fs):
     """Build a z-stack from a list of GCS URIs."""
     lazy_arrays = [
@@ -70,11 +76,16 @@ def build_zstack_from_single(gcs_uri,zstack_metadata,fs):
 
 
 is_zstack = metadata['is_zstack']
+is_tiled = metadata['is_tiled']
 
-if is_zstack:
-    in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_zstack"]
+if is_tiled:
+    if is_zstack:
+        in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_zstack"]
+    else:
+        in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern"]
 else:
-    in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern"]
+    in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_notile"]
+
 
 
 
@@ -84,39 +95,55 @@ def build_zstack_from_single(gcs_uri,zstack_metadata,fs):
 
 
 #TODO: put these in top-level metadata for easier access..
-size_x=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeX']
-size_y=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeY']
-size_z=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeZ']
-size_c=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeC']
-size_tiles=len(metadata['tiles_x'])*len(metadata['tiles_y'])
+size_x=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeX'])
+size_y=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeY'])
+size_z=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeZ'])
+size_c=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeC'])
 
 
-#now get the first channel and first zslice tif 
 
-if is_zstack:
-    #prepopulate this since its the same for all tiles, and takes time to query
-    zstack_metadata = get_zstack_metadata('gcs://'+in_tif_pattern.format(tilex=metadata['tiles_x'][0],tiley=metadata['tiles_y'][0],prefix=metadata['prefixes'][0],channel=metadata['channels'][0]),fs=fs)
+if is_tiled:
 
-tiles=[]
-for i_tile,(tilex,tiley) in enumerate(product(metadata['tiles_x'],metadata['tiles_y'])):
-        
-    zstacks=[]
-    for i_chan,channel in enumerate(metadata['channels']):
-    
-        if is_zstack:            
-            zstacks.append(build_zstack_from_single('gcs://'+in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel),fs=fs,zstack_metadata=zstack_metadata))
-        else:
-            zstacks.append(build_zstack(fs.glob('gcs://'+in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*')),fs=fs))
+    size_tiles=len(metadata['tiles_x'])*len(metadata['tiles_y'])
+
+    #now get the first channel and first zslice tif 
+
+
+    tiles=[]
+    for i_tile,(tilex,tiley) in enumerate(product(metadata['tiles_x'],metadata['tiles_y'])):
+            
+        print(f'tile {tilex}x{tiley}, {i_tile}') 
+        zstacks=[]
+        for i_chan,channel in enumerate(metadata['channels']):
         
+            print(f'channel {i_chan}')
+            if is_zstack:            
+
+                tif_file = in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel)
+                
+                pages=[]
+                #read each page
+                for i_z in range(size_z):
+                    pages.append(da.from_delayed(delayed(read_page_as_numpy)('gcs://'+tif_file,i_z),shape=(size_y,size_x),dtype='uint16'))
+                
+                zstacks.append(da.stack(pages))
 
-    #have list of zstack dask arrays for the tile, one for each channel
-    #stack them up and append to list of tiles
-    tiles.append(da.stack(zstacks))
+            else:
+                zstacks.append(build_zstack(fs.glob('gcs://'+in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*')),fs=fs))
+            
 
+        #have list of zstack dask arrays for the tile, one for each channel
+        #stack them up and append to list of tiles
+        tiles.append(da.stack(zstacks))
+
+
+    #now we have list of tiles, each a dask array
+    #stack them up to get our final array
+    darr = da.stack(tiles)
+
+else:
+    print("single tile data not supported for tif_to_zarr_gcs")
 
-#now we have list of tiles, each a dask array
-#stack them up to get our final array
-darr = da.stack(tiles)
 
 #rescale intensities, and recast 
 darr = darr * snakemake.params.intensity_rescaling

From 2ff9fb4f16dc9a271c5f62e6a43576dc16bc4830 Mon Sep 17 00:00:00 2001
From: Ali Khan <alik@robarts.ca>
Date: Tue, 1 Oct 2024 13:22:54 -0400
Subject: [PATCH 08/18] fix

---
 workflow/scripts/tif_to_zarr_gcs.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/workflow/scripts/tif_to_zarr_gcs.py b/workflow/scripts/tif_to_zarr_gcs.py
index ca9ea11..2840e08 100644
--- a/workflow/scripts/tif_to_zarr_gcs.py
+++ b/workflow/scripts/tif_to_zarr_gcs.py
@@ -32,7 +32,7 @@ def read_tiff_slice(fs,gcs_uri, key=0):
     with fs.open(gcs_uri, 'rb') as file:
         return tifffile.imread(file, key=key)
 
-def read_page_as_numpy(tif_file_uri, page, fs=None):
+def read_page_as_numpy(tif_file_uri, page, fs):
     """Gets a single page (i.e., 2D image) from a tif file z-stack stored in a cloud URI."""
     
     # Open the file from the cloud storage using fsspec
@@ -124,7 +124,7 @@ def build_zstack_from_single(gcs_uri,zstack_metadata,fs):
                 pages=[]
                 #read each page
                 for i_z in range(size_z):
-                    pages.append(da.from_delayed(delayed(read_page_as_numpy)('gcs://'+tif_file,i_z),shape=(size_y,size_x),dtype='uint16'))
+                    pages.append(da.from_delayed(delayed(read_page_as_numpy)('gcs://'+tif_file,i_z,fs),shape=(size_y,size_x),dtype='uint16'))
                 
                 zstacks.append(da.stack(pages))
 

From dbb875593510186f7ba114a3227bdac27f521068 Mon Sep 17 00:00:00 2001
From: Ali Khan <alik@robarts.ca>
Date: Tue, 1 Oct 2024 13:53:20 -0400
Subject: [PATCH 09/18] fix

---
 workflow/scripts/tif_to_zarr_gcs.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/workflow/scripts/tif_to_zarr_gcs.py b/workflow/scripts/tif_to_zarr_gcs.py
index 2840e08..ef454d2 100644
--- a/workflow/scripts/tif_to_zarr_gcs.py
+++ b/workflow/scripts/tif_to_zarr_gcs.py
@@ -7,6 +7,7 @@
 from dask.diagnostics import ProgressBar
 from lib.dask_image import imread_pages
 import gcsfs
+import pyvips
 
 gcsfs_opts={'project': snakemake.params.storage_provider_settings['gcs'].get_settings().project,
                         'token': snakemake.input.creds}

From feb5988da3ff80d59d262cae39f1518b511695bb Mon Sep 17 00:00:00 2001
From: Ali Khan <alik@robarts.ca>
Date: Tue, 1 Oct 2024 15:08:49 -0400
Subject: [PATCH 10/18] reduce threads for tif_to_zarr_gcs

---
 workflow/rules/import.smk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflow/rules/import.smk b/workflow/rules/import.smk
index 3906b36..8a3f320 100644
--- a/workflow/rules/import.smk
+++ b/workflow/rules/import.smk
@@ -280,7 +280,7 @@ rule tif_to_zarr_gcs:
         ),
     group:
         "preproc"
-    threads: config["cores_per_rule"]
+    threads: 16 #config["cores_per_rule"]
     container:
         config["containers"]["spimprep"]
     script:

From 05498fe535e3c1d3b9274183e903c65e63c4f5f1 Mon Sep 17 00:00:00 2001
From: Ali Khan <alik@robarts.ca>
Date: Tue, 1 Oct 2024 21:02:44 -0400
Subject: [PATCH 11/18] back to reading whole zstack tif at once

---
 workflow/rules/import.smk           |  2 +-
 workflow/scripts/tif_to_zarr_gcs.py | 13 ++++---------
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/workflow/rules/import.smk b/workflow/rules/import.smk
index 8a3f320..3906b36 100644
--- a/workflow/rules/import.smk
+++ b/workflow/rules/import.smk
@@ -280,7 +280,7 @@ rule tif_to_zarr_gcs:
         ),
     group:
         "preproc"
-    threads: 16 #config["cores_per_rule"]
+    threads: config["cores_per_rule"]
     container:
         config["containers"]["spimprep"]
     script:
diff --git a/workflow/scripts/tif_to_zarr_gcs.py b/workflow/scripts/tif_to_zarr_gcs.py
index ef454d2..9e69ee9 100644
--- a/workflow/scripts/tif_to_zarr_gcs.py
+++ b/workflow/scripts/tif_to_zarr_gcs.py
@@ -33,8 +33,8 @@ def read_tiff_slice(fs,gcs_uri, key=0):
     with fs.open(gcs_uri, 'rb') as file:
         return tifffile.imread(file, key=key)
 
-def read_page_as_numpy(tif_file_uri, page, fs):
-    """Gets a single page (i.e., 2D image) from a tif file z-stack stored in a cloud URI."""
+def read_stack_as_numpy(tif_file_uri, fs):
+    """Gets the full stack (i.e., 3D image) from a tif file z-stack stored in a cloud URI."""
     
     # Open the file from the cloud storage using fsspec
     with fs.open(tif_file_uri, 'rb') as f:
@@ -42,7 +42,7 @@ def read_page_as_numpy(tif_file_uri, page, fs):
         file_buffer = f.read()
     
     # Use pyvips to read from the buffer
-    return pyvips.Image.new_from_buffer(file_buffer, "", page=page).numpy()
+    return pyvips.Image.new_from_buffer(file_buffer, "").numpy()
 
 
 
@@ -122,12 +122,7 @@ def build_zstack_from_single(gcs_uri,zstack_metadata,fs):
 
                 tif_file = in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel)
                 
-                pages=[]
-                #read each page
-                for i_z in range(size_z):
-                    pages.append(da.from_delayed(delayed(read_page_as_numpy)('gcs://'+tif_file,i_z,fs),shape=(size_y,size_x),dtype='uint16'))
-                
-                zstacks.append(da.stack(pages))
+                zstacks.append(da.from_delayed(delayed(read_page_as_numpy)('gcs://'+tif_file,fs),shape=(size_z,size_y,size_x),dtype='uint16'))
 
             else:
                 zstacks.append(build_zstack(fs.glob('gcs://'+in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*')),fs=fs))

From 8739291d4fecd1065df776133048d4cadc5e88c4 Mon Sep 17 00:00:00 2001
From: Ali Khan <alik@robarts.ca>
Date: Fri, 4 Oct 2024 11:52:42 -0400
Subject: [PATCH 12/18] hopefully final fix for reading tif zstacks on gcs

downloads temporary file then uses pyvips to read zstack in a delayed
function -- should avoid memory issues
---
 workflow/scripts/tif_to_zarr_gcs.py | 30 +++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/workflow/scripts/tif_to_zarr_gcs.py b/workflow/scripts/tif_to_zarr_gcs.py
index 9e69ee9..1981183 100644
--- a/workflow/scripts/tif_to_zarr_gcs.py
+++ b/workflow/scripts/tif_to_zarr_gcs.py
@@ -1,6 +1,9 @@
+import tempfile
 import tifffile
 import json
 import dask.array as da
+import numpy as np
+import dask
 from dask.delayed import delayed
 import dask.array.image 
 from itertools import product
@@ -13,6 +16,7 @@
                         'token': snakemake.input.creds}
 fs = gcsfs.GCSFileSystem(**gcsfs_opts)
 
+dask.config.set(scheduler='synchronous')  # overwrite default with single-threaded scheduler
 
 def replace_square_brackets(pattern):
     """replace all [ and ] in the string (have to use 
@@ -33,16 +37,26 @@ def read_tiff_slice(fs,gcs_uri, key=0):
     with fs.open(gcs_uri, 'rb') as file:
         return tifffile.imread(file, key=key)
 
-def read_stack_as_numpy(tif_file_uri, fs):
+def read_stack_as_numpy(tif_file_uri, fs, Nz,Ny,Nx):
     """Gets the full stack (i.e., 3D image) from a tif file z-stack stored in a cloud URI."""
     
-    # Open the file from the cloud storage using fsspec
-    with fs.open(tif_file_uri, 'rb') as f:
-        # Read the file content into a buffer
-        file_buffer = f.read()
+    #init array
+    vol = np.zeros((Nz,Ny,Nx),dtype='uint16')
+
+    # Create a temporary file with a .tif extension
+    with tempfile.NamedTemporaryFile(suffix=".tif", delete=True) as temp_file:
+        temp_file_path = temp_file.name
+        
+        # Open the remote file and write it to the temporary file
+        with fs.open(tif_file_uri, 'rb') as remote_file:
+            temp_file.write(remote_file.read())
+
+
+        # Use pyvips to read from the file
+        for i in range(Nz):
+            vol[i,:,:] = pyvips.Image.new_from_file(temp_file_path,  page=i).numpy()
     
-    # Use pyvips to read from the buffer
-    return pyvips.Image.new_from_buffer(file_buffer, "").numpy()
+    return vol
 
 
 
@@ -122,7 +136,7 @@ def build_zstack_from_single(gcs_uri,zstack_metadata,fs):
 
                 tif_file = in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel)
                 
-                zstacks.append(da.from_delayed(delayed(read_page_as_numpy)('gcs://'+tif_file,fs),shape=(size_z,size_y,size_x),dtype='uint16'))
+                zstacks.append(da.from_delayed(delayed(read_stack_as_numpy)('gcs://'+tif_file,fs,size_z,size_y,size_x),shape=(size_z,size_y,size_x),dtype='uint16'))
 
             else:
                 zstacks.append(build_zstack(fs.glob('gcs://'+in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*')),fs=fs))

From 3b03fbddcadec0f31c0e1b0f235c39733ce8bbbc Mon Sep 17 00:00:00 2001
From: Ali Khan <alik@robarts.ca>
Date: Fri, 4 Oct 2024 12:13:02 -0400
Subject: [PATCH 13/18] remove dask sync (was debugging), and set chunks

---
 workflow/scripts/tif_to_zarr_gcs.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/workflow/scripts/tif_to_zarr_gcs.py b/workflow/scripts/tif_to_zarr_gcs.py
index 1981183..899a43e 100644
--- a/workflow/scripts/tif_to_zarr_gcs.py
+++ b/workflow/scripts/tif_to_zarr_gcs.py
@@ -16,7 +16,6 @@
                         'token': snakemake.input.creds}
 fs = gcsfs.GCSFileSystem(**gcsfs_opts)
 
-dask.config.set(scheduler='synchronous')  # overwrite default with single-threaded scheduler
 
 def replace_square_brackets(pattern):
     """replace all [ and ] in the string (have to use 
@@ -136,7 +135,7 @@ def build_zstack_from_single(gcs_uri,zstack_metadata,fs):
 
                 tif_file = in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel)
                 
-                zstacks.append(da.from_delayed(delayed(read_stack_as_numpy)('gcs://'+tif_file,fs,size_z,size_y,size_x),shape=(size_z,size_y,size_x),dtype='uint16'))
+                zstacks.append(da.from_delayed(delayed(read_stack_as_numpy)('gcs://'+tif_file,fs,size_z,size_y,size_x),shape=(size_z,size_y,size_x),chunks=(1,size_y,size_x),dtype='uint16'))
 
             else:
                 zstacks.append(build_zstack(fs.glob('gcs://'+in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*')),fs=fs))

From 6079e6dd4765ad0647da9d2ad11de986891a2251 Mon Sep 17 00:00:00 2001
From: Ali Khan <alik@robarts.ca>
Date: Fri, 4 Oct 2024 12:35:59 -0400
Subject: [PATCH 14/18] had to rechunk, as setting chunk size in from_delayed
 isn't allowed

---
 workflow/scripts/tif_to_zarr_gcs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflow/scripts/tif_to_zarr_gcs.py b/workflow/scripts/tif_to_zarr_gcs.py
index 899a43e..c9ea189 100644
--- a/workflow/scripts/tif_to_zarr_gcs.py
+++ b/workflow/scripts/tif_to_zarr_gcs.py
@@ -135,7 +135,7 @@ def build_zstack_from_single(gcs_uri,zstack_metadata,fs):
 
                 tif_file = in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel)
                 
-                zstacks.append(da.from_delayed(delayed(read_stack_as_numpy)('gcs://'+tif_file,fs,size_z,size_y,size_x),shape=(size_z,size_y,size_x),chunks=(1,size_y,size_x),dtype='uint16'))
+                zstacks.append(da.from_delayed(delayed(read_stack_as_numpy)('gcs://'+tif_file,fs,size_z,size_y,size_x),shape=(size_z,size_y,size_x),dtype='uint16').rechunk((1,size_y,size_x)))
 
             else:
                 zstacks.append(build_zstack(fs.glob('gcs://'+in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*')),fs=fs))

From edea152438192b1fe15ac02c7b1c86aaa2d6e207 Mon Sep 17 00:00:00 2001
From: Ali Khan <alik@robarts.ca>
Date: Fri, 4 Oct 2024 14:03:19 -0400
Subject: [PATCH 15/18] constrain number of threads

---
 workflow/rules/import.smk           | 2 +-
 workflow/scripts/tif_to_zarr_gcs.py | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/workflow/rules/import.smk b/workflow/rules/import.smk
index 3906b36..d9db6f6 100644
--- a/workflow/rules/import.smk
+++ b/workflow/rules/import.smk
@@ -280,7 +280,7 @@ rule tif_to_zarr_gcs:
         ),
     group:
         "preproc"
-    threads: config["cores_per_rule"]
+    threads: 8
     container:
         config["containers"]["spimprep"]
     script:
diff --git a/workflow/scripts/tif_to_zarr_gcs.py b/workflow/scripts/tif_to_zarr_gcs.py
index c9ea189..c738892 100644
--- a/workflow/scripts/tif_to_zarr_gcs.py
+++ b/workflow/scripts/tif_to_zarr_gcs.py
@@ -11,11 +11,13 @@
 from lib.dask_image import imread_pages
 import gcsfs
 import pyvips
+from dask.distributed import Client, LocalCluster
 
 gcsfs_opts={'project': snakemake.params.storage_provider_settings['gcs'].get_settings().project,
                         'token': snakemake.input.creds}
 fs = gcsfs.GCSFileSystem(**gcsfs_opts)
 
+cluster = LocalCluster(n_workers=snakemake.threads, threads_per_worker=1)
 
 def replace_square_brackets(pattern):
     """replace all [ and ] in the string (have to use 

From 7b9196933e815dfdd422e88d8809fd6c55f0d150 Mon Sep 17 00:00:00 2001
From: Ali Khan <alik@robarts.ca>
Date: Fri, 4 Oct 2024 16:19:20 -0400
Subject: [PATCH 16/18] use threaded instead

---
 workflow/scripts/tif_to_zarr_gcs.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/workflow/scripts/tif_to_zarr_gcs.py b/workflow/scripts/tif_to_zarr_gcs.py
index c738892..41ed921 100644
--- a/workflow/scripts/tif_to_zarr_gcs.py
+++ b/workflow/scripts/tif_to_zarr_gcs.py
@@ -11,13 +11,12 @@
 from lib.dask_image import imread_pages
 import gcsfs
 import pyvips
-from dask.distributed import Client, LocalCluster
 
 gcsfs_opts={'project': snakemake.params.storage_provider_settings['gcs'].get_settings().project,
                         'token': snakemake.input.creds}
 fs = gcsfs.GCSFileSystem(**gcsfs_opts)
 
-cluster = LocalCluster(n_workers=snakemake.threads, threads_per_worker=1)
+dask.config.set(scheduler='threads', num_workers=snakemake.threads)  
 
 def replace_square_brackets(pattern):
     """replace all [ and ] in the string (have to use 

From 5fa43fb0c0466a84e2583ff5fcc69b07da0722f5 Mon Sep 17 00:00:00 2001
From: Ali Khan <alik@robarts.ca>
Date: Fri, 4 Oct 2024 21:48:05 -0400
Subject: [PATCH 17/18] skip qc, adjust threads

---
 workflow/Snakefile        | 2 +-
 workflow/rules/import.smk | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index cd19ab3..c997f26 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -33,7 +33,7 @@ rule all:
     input:
         get_all_targets(),
         get_bids_toplevel_targets(),
-        get_qc_targets(),  #need to skip this if using prestitched
+#        get_qc_targets(),  #need to skip this if using prestitched
     localrule: True
 
 
diff --git a/workflow/rules/import.smk b/workflow/rules/import.smk
index d9db6f6..9fdb401 100644
--- a/workflow/rules/import.smk
+++ b/workflow/rules/import.smk
@@ -280,7 +280,7 @@ rule tif_to_zarr_gcs:
         ),
     group:
         "preproc"
-    threads: 8
+    threads: 24
     container:
         config["containers"]["spimprep"]
     script:

From c9c4ba91283acfe5d619618f52001eb46ca24ed3 Mon Sep 17 00:00:00 2001
From: Ali Khan <alik@robarts.ca>
Date: Sat, 5 Oct 2024 00:15:03 -0400
Subject: [PATCH 18/18] update threads

---
 workflow/rules/import.smk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflow/rules/import.smk b/workflow/rules/import.smk
index 9fdb401..421ff80 100644
--- a/workflow/rules/import.smk
+++ b/workflow/rules/import.smk
@@ -280,7 +280,7 @@ rule tif_to_zarr_gcs:
         ),
     group:
         "preproc"
-    threads: 24
+    threads: 16
     container:
         config["containers"]["spimprep"]
     script: