From 5928f7cb0735920a5b0554b3597bb3844e645a02 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Sat, 28 Sep 2024 08:53:24 -0400 Subject: [PATCH 01/18] swap uncorr and corr in flatfield corr --- resources/qc/ff_html_temp.html | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/resources/qc/ff_html_temp.html b/resources/qc/ff_html_temp.html index 658b26d..ba6ee7a 100644 --- a/resources/qc/ff_html_temp.html +++ b/resources/qc/ff_html_temp.html @@ -43,13 +43,13 @@

Chunk - {{ chunk_num }} Channel - {{ loop.index0 }}

{%- for image in channel %} - -

Corrected

+ +

Uncorrected

Slice-{{ image.slice }}

- -

Uncorrected

+ +

Corrected

Slice-{{ image.slice }}

{%- endfor %} @@ -117,4 +117,4 @@

Uncorrected

} }) - \ No newline at end of file + From 1ff3be8eb600f4d9c467acc28726fe788188550e Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Sat, 28 Sep 2024 09:20:52 -0400 Subject: [PATCH 02/18] remove fiji from workflow --- README.md | 5 +- config/config.yml | 8 +- workflow/macros/AutostitchMacro.ijm | 56 --------- workflow/macros/FuseImageMacroZarr.ijm | 47 ------- workflow/rules/bigstitcher.smk | 168 +------------------------ workflow/rules/common.smk | 46 ------- 6 files changed, 9 insertions(+), 321 deletions(-) delete mode 100644 workflow/macros/AutostitchMacro.ijm delete mode 100644 workflow/macros/FuseImageMacroZarr.ijm diff --git a/README.md b/README.md index e5d5b8d..018def7 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ Takes TIF images (tiled or prestitched) and outputs a validated BIDS Microscopy - Python >= 3.11 - Lightsheet data: - Raw Ultramicroscope Blaze OME TIFF files (include `blaze` in the acquisition tag) + - can be 2D or 3D TIFF files - Prestitched TIFF files (include `prestitched` in the acquisition tag) @@ -63,10 +64,8 @@ or for snakemake<8.0, use: snakemake -c all --use-singularity ``` -Note: if you run the workflow on a system with large memory, you will need to set the heap size for the stitching and fusion rules. This can be done with e.g.: `--set-resources bigstitcher_spark_stitching:mem_mb=60000 bigstitcher_spark_fusion:mem_mb=100000` +Note: if you run the workflow on a system with large memory, you will need to set the heap size for the stitching and fusion rules. This can be done with e.g.: `--set-resources bigstitcher_stitching:mem_mb=60000 bigstitcher_fusion:mem_mb=100000` 7. If you want to run the workflow using a batch job submission server, please see the executor plugins here: https://snakemake.github.io/snakemake-plugin-catalog/ - -Alternate usage of this workflow (making use of conda) is described in the [Snakemake Workflow Catalog](https://snakemake.github.io/snakemake-workflow-catalog?repo=khanlab/SPIMprep). diff --git a/config/config.yml b/config/config.yml index edc16ba..ae096d2 100644 --- a/config/config.yml +++ b/config/config.yml @@ -36,12 +36,10 @@ bigstitcher: downsample_in_x: 4 downsample_in_y: 4 downsample_in_z: 1 - method: "phase_corr" #unused - methods: #unused + methods: #unused, only for reference phase_corr: "Phase Correlation" optical_flow: "Lucas-Kanade" filter_pairwise_shifts: - enabled: 1 #unused min_r: 0.7 max_shift_total: 50 global_optimization: @@ -64,7 +62,7 @@ bigstitcher: block_size_factor_z: 32 ome_zarr: - desc: sparkstitchedflatcorr + desc: stitchedflatcorr max_downsampling_layers: 5 # e.g. 4 levels: { 0: orig, 1: ds2, 2: ds4, 3: ds8, 4: ds16} rechunk_size: #z, y, x - 1 @@ -155,5 +153,5 @@ report: containers: - spimprep: 'docker://khanlab/spimprep-deps:main' + spimprep: 'docker://khanlab/spimprep-deps:v0.1.0' diff --git a/workflow/macros/AutostitchMacro.ijm b/workflow/macros/AutostitchMacro.ijm deleted file mode 100644 index e9df4df..0000000 --- a/workflow/macros/AutostitchMacro.ijm +++ /dev/null @@ -1,56 +0,0 @@ -args = getArgument() -args = split(args, " "); - -dataset_xml = args[0] -pairwise_method = args[1] -ds_x = args[2] -ds_y = args[3] -ds_z = args[4] -do_filter = args[5] -min_r = args[6] -do_global = args[7] -global_strategy = args[8] - -run("Calculate pairwise shifts ...", - "select=" + dataset_xml + - " process_angle=[All angles] " + - " process_channel=[All channels] " + - " process_illumination=[All illuminations] " + - " process_tile=[All tiles] " + - " process_timepoint=[All Timepoints] " + - " method=["+ pairwise_method +"] " + - " channels=[Average Channels] " + - " downsample_in_x=" + ds_x + - " downsample_in_y=" + ds_y + - " downsample_in_z=" + ds_z ); - - -if ( do_filter == 1 ){ - -run("Filter pairwise shifts ...", - "select=" + dataset_xml + - " min_r=" + min_r + - " max_r=1 " + - " max_shift_in_x=0 " + - " max_shift_in_y=0 " + - " max_shift_in_z=0 " + - " max_displacement=0"); -} - -if ( do_global == 1 ){ - -run("Optimize globally and apply shifts ...", - "select=" + dataset_xml + - " process_angle=[All angles] " + - " process_channel=[All channels] " + - " process_illumination=[All illuminations] " + - " process_tile=[All tiles] " + - " process_timepoint=[All Timepoints] " + - " relative=2.500 " + - " absolute=3.500 " + - " global_optimization_strategy=["+global_strategy+"] " + - " fix_group_0-0,"); -} - -// quit after we are finished -eval("script", "System.exit(0);"); diff --git a/workflow/macros/FuseImageMacroZarr.ijm b/workflow/macros/FuseImageMacroZarr.ijm deleted file mode 100644 index 9d49b06..0000000 --- a/workflow/macros/FuseImageMacroZarr.ijm +++ /dev/null @@ -1,47 +0,0 @@ -args = getArgument() -args = split(args, " "); - -in_dataset_xml = args[0] -downsampling = args[1] -channel = args[2] -output_zarr = args[3] -block_size_x = args[4] -block_size_y = args[5] -block_size_z = args[6] -block_size_factor_x = args[7] -block_size_factor_y = args[8] -block_size_factor_z = args[9] - -run("Fuse dataset ...", - "select=" + in_dataset_xml + - " process_angle=[All angles] " + - " process_channel=[Single channel (Select from List)] " + - " processing_channel=[channel " + channel + "]" + - " process_illumination=[All illuminations] " + - " process_tile=[All tiles] " + - " process_timepoint=[All Timepoints] " + - " bounding_box=[Currently Selected Views] " + - " preserve_original " + - " downsampling=" + downsampling + - " interpolation=[Linear Interpolation] " + - " pixel_type=[16-bit unsigned integer] " + - " interest_points_for_non_rigid=[-= Disable Non-Rigid =-] " + - " blend produce=[Each timepoint & channel] " + - " fused_image=[ZARR/N5/HDF5 export using N5-API] " + - " define_input=[Auto-load from input data (values shown below)] " + - " export=ZARR create_0 " + - " zarr_dataset_path=" + output_zarr + - " zarr_base_dataset=/ " + - " zarr_dataset_extension=/s0 " + - " show_advanced_block_size_options " + - " block_size_x=" + block_size_x + - " block_size_y=" + block_size_y + - " block_size_z=" + block_size_z + - " block_size_factor_x=" + block_size_factor_x + - " block_size_factor_y=" + block_size_factor_y + - " block_size_factor_z=" + block_size_factor_z + - " subsampling_factors=[{ {1,1,1}}]"); - - -// quit after we are finished -eval("script", "System.exit(0);"); diff --git a/workflow/rules/bigstitcher.smk b/workflow/rules/bigstitcher.smk index 4256e3b..24a4a3e 100644 --- a/workflow/rules/bigstitcher.smk +++ b/workflow/rules/bigstitcher.smk @@ -94,74 +94,7 @@ rule zarr_to_bdv: "../scripts/zarr_to_n5_bdv.py" -rule bigstitcher: - input: - dataset_n5=rules.zarr_to_bdv.output.bdv_n5, - dataset_xml=rules.zarr_to_bdv.output.bdv_xml, - ijm=Path(workflow.basedir) / "macros" / "AutostitchMacro.ijm", - params: - fiji_launcher_cmd=get_fiji_launcher_cmd, - macro_args=get_macro_args_bigstitcher, - rm_old_xml=lambda wildcards, output: f"rm -f {output.dataset_xml}~?", - output: - launcher=temp( - bids( - root=work, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - desc="{desc}", - suffix="bigstitcherfiji.sh", - ) - ), - dataset_xml=temp( - bids( - root=work, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - desc="{desc}", - suffix="bigstitcherfiji.xml", - ) - ), - benchmark: - bids( - root="benchmarks", - datatype="bigstitcherfiji", - subject="{subject}", - sample="{sample}", - acq="{acq}", - desc="{desc}", - suffix="benchmark.tsv", - ) - log: - bids( - root="logs", - datatype="bigstitcherfiji", - subject="{subject}", - sample="{sample}", - acq="{acq}", - desc="{desc}", - suffix="log.txt", - ), - container: - config["containers"]["spimprep"] - resources: - runtime=30, - mem_mb=10000, - threads: config["cores_per_rule"] - group: - "preproc" - shell: - "cp {input.dataset_xml} {output.dataset_xml} && " - " {params.fiji_launcher_cmd} && " - " echo ' -macro {input.ijm} \"{params.macro_args}\"' >> {output.launcher} " - " && {output.launcher} |& tee {log} && {params.rm_old_xml}" - - -rule bigstitcher_spark_stitching: +rule bigstitcher_stitching: input: dataset_n5=rules.zarr_to_bdv.output.bdv_n5, dataset_xml=rules.zarr_to_bdv.output.bdv_xml, @@ -230,10 +163,10 @@ rule bigstitcher_spark_stitching: "{params.rm_old_xml}" -rule bigstitcher_spark_solver: +rule bigstitcher_solver: input: dataset_n5=rules.zarr_to_bdv.output.bdv_n5, - dataset_xml=rules.bigstitcher_spark_stitching.output.dataset_xml, + dataset_xml=rules.bigstitcher_stitching.output.dataset_xml, params: downsampling="--downsampling={dsx},{dsy},{dsz}".format( dsx=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_x"], @@ -295,7 +228,6 @@ rule bigstitcher_spark_solver: "{params.rm_old_xml}" #lambda 0.1 is default (can expose this if needed) - rule bigstitcher_fusion: input: dataset_n5=bids( @@ -320,98 +252,6 @@ rule bigstitcher_fusion: else "stitching" ), ), - ijm=Path(workflow.basedir) / "macros" / "FuseImageMacroZarr.ijm", - params: - fiji_launcher_cmd=get_fiji_launcher_cmd, - macro_args=get_macro_args_zarr_fusion, - output: - launcher=temp( - bids( - root=work, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - desc="stitched{desc}", - stain="{stain}", - suffix="fuseimagen5.sh", - ) - ), - zarr=temp( - directory( - bids( - root=work, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - desc="stitched{desc}", - stain="{stain}", - suffix="SPIM.zarr", - ) - ) - ), - benchmark: - bids( - root="benchmarks", - datatype="fuse_dataset", - subject="{subject}", - sample="{sample}", - acq="{acq}", - desc="stitched{desc}", - stain="{stain}", - suffix="benchmark.tsv", - ) - log: - bids( - root="logs", - datatype="fuse_dataset", - subject="{subject}", - sample="{sample}", - acq="{acq}", - desc="stitched{desc}", - stain="{stain}", - suffix="log.txt", - ), - container: - config["containers"]["spimprep"] - resources: - runtime=30, - mem_mb=40000, - threads: config["cores_per_rule"] - group: - "preproc" - shell: - " {params.fiji_launcher_cmd} && " - " echo ' -macro {input.ijm} \"{params.macro_args}\"' >> {output.launcher} " - " && {output.launcher} |& tee {log}" - - -rule bigstitcher_spark_fusion: - input: - dataset_n5=bids( - root=work, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - desc="{desc}", - suffix="bdv.n5", - ), - dataset_xml=bids( - root=work, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - desc="{desc}", - suffix="bigstitcher{}.xml".format( - "solver" - if config["bigstitcher"]["global_optimization"]["enabled"] - else "stitching" - ), - ), - ijm=Path(workflow.basedir) / "macros" / "FuseImageMacroZarr.ijm", params: channel=lambda wildcards: "--channelId={channel}".format( channel=get_stains(wildcards).index(wildcards.stain) @@ -438,7 +278,7 @@ rule bigstitcher_spark_fusion: datatype="micr", sample="{sample}", acq="{acq}", - desc="sparkstitched{desc}", + desc="stitched{desc}", stain="{stain}", suffix="SPIM.zarr", ) diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 88d40e7..80a773a 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -257,52 +257,6 @@ def get_stains(wildcards): return df.iloc[0][stain_columns].dropna().tolist() -# bigstitcher -def get_fiji_launcher_cmd(wildcards, output, threads, resources): - launcher_opts_find = "-Xincgc" - launcher_opts_replace = f"-XX:+UseG1GC -verbose:gc -XX:+PrintGCDateStamps -XX:ActiveProcessorCount={threads}" - pipe_cmds = [] - pipe_cmds.append("ImageJ-linux64 --dry-run --headless --console") - pipe_cmds.append(f"sed 's/{launcher_opts_find}/{launcher_opts_replace}'/") - pipe_cmds.append( - rf"sed 's/-Xmx[0-9a-z]\+/-Xmx{resources.mem_mb}m -Xms{resources.mem_mb}m/'" - ) - pipe_cmds.append("tr --delete '\\n'") - return "|".join(pipe_cmds) + f" > {output.launcher} && chmod a+x {output.launcher} " - - -def get_macro_args_bigstitcher(wildcards, input, output): - return "{dataset_xml} {pairwise_method} {ds_x} {ds_y} {ds_z} {do_filter} {min_r} {do_global} {global_strategy}".format( - dataset_xml=output.dataset_xml, - pairwise_method=config["bigstitcher"]["calc_pairwise_shifts"]["methods"][ - config["bigstitcher"]["calc_pairwise_shifts"]["method"] - ], - ds_x=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_x"], - ds_y=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_y"], - ds_z=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_z"], - do_filter=config["bigstitcher"]["filter_pairwise_shifts"]["enabled"], - min_r=config["bigstitcher"]["filter_pairwise_shifts"]["min_r"], - do_global=config["bigstitcher"]["global_optimization"]["enabled"], - global_strategy=config["bigstitcher"]["global_optimization"]["strategies"][ - config["bigstitcher"]["global_optimization"]["strategy"] - ], - ) - - -def get_macro_args_zarr_fusion(wildcards, input, output): - return "{dataset_xml} {downsampling} {channel:02d} {output_zarr} {bsx} {bsy} {bsz} {bsfx} {bsfy} {bsfz}".format( - dataset_xml=input.dataset_xml, - downsampling=config["bigstitcher"]["fuse_dataset"]["downsampling"], - channel=get_stains(wildcards).index(wildcards.stain), - output_zarr=output.zarr, - bsx=config["bigstitcher"]["fuse_dataset"]["block_size_x"], - bsy=config["bigstitcher"]["fuse_dataset"]["block_size_y"], - bsz=config["bigstitcher"]["fuse_dataset"]["block_size_z"], - bsfx=config["bigstitcher"]["fuse_dataset"]["block_size_factor_x"], - bsfy=config["bigstitcher"]["fuse_dataset"]["block_size_factor_y"], - bsfz=config["bigstitcher"]["fuse_dataset"]["block_size_factor_z"], - ) - def get_output_ome_zarr_uri(): if is_remote(config["root"]): From 65adb787a9a7dc9d496a5f8e3ead8420b0fb6f94 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Thu, 12 Sep 2024 18:09:43 -0400 Subject: [PATCH 03/18] updates for compatibility with input zstacks Blaze microscope now outputs data with one tif file per zstack. This adds compatibility for it. TODO: test full end-to-end --- config/config.yml | 2 +- workflow/lib/dask_image.py | 45 ++++++++++++++++ workflow/rules/import.smk | 9 ---- workflow/scripts/blaze_to_metadata.py | 64 ++++++++++++++++++----- workflow/scripts/blaze_to_metadata_gcs.py | 59 ++++++++++++++++----- workflow/scripts/tif_to_zarr.py | 40 ++++++++++---- workflow/scripts/tif_to_zarr_gcs.py | 53 +++++++++++++++++-- 7 files changed, 221 insertions(+), 51 deletions(-) create mode 100644 workflow/lib/dask_image.py diff --git a/config/config.yml b/config/config.yml index ae096d2..9858eb7 100644 --- a/config/config.yml +++ b/config/config.yml @@ -1,6 +1,5 @@ datasets: 'config/datasets.tsv' - root: 'bids' # can use a s3:// or gcs:// prefix to write output to cloud storage work: 'work' @@ -13,6 +12,7 @@ cores_per_rule: 32 #import wildcards: tilex, tiley, channel, zslice (and prefix - unused) import_blaze: raw_tif_pattern: "{prefix}_Blaze[{tilex} x {tiley}]_C{channel}_xyz-Table Z{zslice}.ome.tif" + raw_tif_pattern_zstack: "{prefix}_Blaze[{tilex} x {tiley}]_C{channel}.ome.tif" intensity_rescaling: 0.5 #raw images seem to be at the upper end of uint16 (over-saturated) -- causes wrapping issues when adjusting with flatfield correction etc. this rescales the raw data as it imports it.. import_prestitched: diff --git a/workflow/lib/dask_image.py b/workflow/lib/dask_image.py new file mode 100644 index 0000000..dbc06c3 --- /dev/null +++ b/workflow/lib/dask_image.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +import os +from glob import glob + +try: + import tifffile +except (AttributeError, ImportError): + pass + +from dask.array.core import Array +from dask.base import tokenize + + +def add_leading_dimension(x): + return x[None, ...] + +def imread(fn, page): + return tifffile.imread(fn,key=page) + + +def imread_pages(filename, preprocess=None): + + + tif = tifffile.TiffFile(filename) + pages = [i for i in range(len(tif.pages))] + name = "imread-%s" % tokenize(filename, os.path.getmtime(filename)) + + sample = tif.pages[0].asarray() + + if preprocess: + sample = preprocess(sample) + + keys = [(name, i) + (0,) * len(sample.shape) for i in pages] + if preprocess: + values = [ + (add_leading_dimension, (preprocess, (imread, filename,i))) for i in pages + ] + else: + values = [(add_leading_dimension, (imread, filename,i)) for i in pages] + dsk = dict(zip(keys, values)) + + chunks = ((1,) * len(pages),) + tuple((d,) for d in sample.shape) + + return Array(dsk, name, chunks, sample.dtype) diff --git a/workflow/rules/import.smk b/workflow/rules/import.smk index 8a9074a..3906b36 100644 --- a/workflow/rules/import.smk +++ b/workflow/rules/import.smk @@ -79,11 +79,6 @@ rule blaze_to_metadata_gcs: rule blaze_to_metadata: input: ome_dir=get_input_dataset, - params: - in_tif_pattern=lambda wildcards, input: os.path.join( - input.ome_dir, - config["import_blaze"]["raw_tif_pattern"], - ), output: metadata_json=temp( bids( @@ -197,10 +192,6 @@ rule tif_to_zarr: ome_dir=get_input_dataset, metadata_json=rules.copy_blaze_metadata.output.metadata_json, params: - in_tif_pattern=lambda wildcards, input: os.path.join( - input.ome_dir, - config["import_blaze"]["raw_tif_pattern"], - ), intensity_rescaling=config["import_blaze"]["intensity_rescaling"], output: zarr=temp( diff --git a/workflow/scripts/blaze_to_metadata.py b/workflow/scripts/blaze_to_metadata.py index 608158e..196dac5 100644 --- a/workflow/scripts/blaze_to_metadata.py +++ b/workflow/scripts/blaze_to_metadata.py @@ -4,25 +4,48 @@ import re from itertools import product from snakemake.io import glob_wildcards +from glob import glob +from pathlib import Path +import os -in_tif_pattern = snakemake.params.in_tif_pattern +tif_files = glob(f"{snakemake.input.ome_dir}/*.tif") + +#check first tif file to see if it is zstack or not: +if 'xyz-Table Z' in Path(tif_files[0]).name: + is_zstack=False +else: + is_zstack=True + +if is_zstack: + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_zstack"]) +else: + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern"]) #add a wildcard constraint to ensure no #subfolders get parsed (ie don't match anything with / in it): prefix_constraint=r'[^/]+' in_tif_pattern_constrained = in_tif_pattern.replace('{prefix}',f'{{prefix,{prefix_constraint}}}') + #parse the filenames to get number of channels, tiles etc.. -prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern_constrained) +if is_zstack: + prefix, tilex, tiley, channel = glob_wildcards(in_tif_pattern_constrained,tif_files) +else: + prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern_constrained,tif_files) tiles_x = sorted(list(set(tilex))) tiles_y = sorted(list(set(tiley))) channels = sorted(list(set(channel))) -zslices = sorted(list(set(zslice))) prefixes = sorted(list(set(prefix))) -#read in series metadata from first file -in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0],zslice=zslices[0]) +if not is_zstack: + zslices = sorted(list(set(zslice))) + + #read in series metadata from first file + in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0],zslice=zslices[0]) + +else: + in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0]) raw_tif = tifffile.TiffFile(in_tif,mode='r') @@ -37,12 +60,18 @@ custom_metadata = ome_dict['OME']['Image']['ca:CustomAttributes'] - #read tile configuration from the microscope metadata if axes == 'CZYX': - tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+), (?P\S+)\)" + if is_zstack: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+)\)" + else: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+), (?P\S+)\)" elif axes == 'ZYX': - tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+), (?P\S+)\)" + if is_zstack: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+)\)" + else: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+), (?P\S+)\)" + tile_pattern = re.compile(tile_config_pattern) @@ -58,23 +87,29 @@ chunks.append(chunk) for line in custom_metadata['TileConfiguration']['@TileConfiguration'].split(' ')[1:]: - d = re.search(tile_pattern,line).groupdict() chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first - - #key is: tile-{chunk}_chan-{channel}_z-{zslice} - key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" + + if is_zstack: + key = f"tile-{chunk}_chan-{d['channel']}_z-0" + else: + #key is: tile-{chunk}_chan-{channel}_z-{zslice} + key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" map_x[key] = float(d['x']) map_y[key] = float(d['y']) - map_z[key] = float(d['z']) + if is_zstack: + map_z[key] = float(0) + else: + map_z[key] = float(d['z']) metadata={} metadata['tiles_x'] = tiles_x metadata['tiles_y'] = tiles_y metadata['channels'] = channels -metadata['zslices'] = zslices +if not is_zstack: + metadata['zslices'] = zslices metadata['prefixes'] = prefixes metadata['chunks'] = chunks metadata['axes'] = axes @@ -88,6 +123,7 @@ metadata['ome_full_metadata'] = ome_dict metadata['PixelSize'] = [ metadata['physical_size_z']/1000.0, metadata['physical_size_y']/1000.0, metadata['physical_size_x']/1000.0 ] #zyx since OME-Zarr is ZYX metadata['PixelSizeUnits'] = 'mm' +metadata['is_zstack'] = is_zstack #write metadata to json with open(snakemake.output.metadata_json, 'w') as fp: diff --git a/workflow/scripts/blaze_to_metadata_gcs.py b/workflow/scripts/blaze_to_metadata_gcs.py index 7817d87..cff5302 100644 --- a/workflow/scripts/blaze_to_metadata_gcs.py +++ b/workflow/scripts/blaze_to_metadata_gcs.py @@ -5,12 +5,11 @@ import os from itertools import product from snakemake.io import glob_wildcards +from pathlib import Path import gcsfs from lib.cloud_io import get_fsspec dataset_uri = snakemake.params.dataset_path -in_tif_pattern = snakemake.params.in_tif_pattern - gcsfs_opts={'project': snakemake.params.storage_provider_settings['gcs'].get_settings().project, 'token': snakemake.input.creds} @@ -18,18 +17,38 @@ tifs = fs.glob(f"{dataset_uri}/*.tif") +#check first tif file to see if it is zstack or not: +if 'xyz-Table Z' in Path(tifs[0]).name: + is_zstack=False +else: + is_zstack=True + +if is_zstack: + in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_zstack"] +else: + in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern"] + + + #parse the filenames to get number of channels, tiles etc.. -prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern,files=tifs) +if is_zstack: + prefix, tilex, tiley, channel = glob_wildcards(in_tif_pattern,files=tifs) +else: + prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern,files=tifs) + zslices = sorted(list(set(zslice))) tiles_x = sorted(list(set(tilex))) tiles_y = sorted(list(set(tiley))) channels = sorted(list(set(channel))) -zslices = sorted(list(set(zslice))) prefixes = sorted(list(set(prefix))) +if not is_zstack: + zslices = sorted(list(set(zslice))) -#read in series metadata from first file -in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0],zslice=zslices[0]) + #read in series metadata from first file + in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0],zslice=zslices[0]) +else: + in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0]) with fs.open(f"gcs://{in_tif}", 'rb') as tif_file: raw_tif = tifffile.TiffFile(tif_file,mode='r') @@ -49,9 +68,15 @@ #read tile configuration from the microscope metadata if axes == 'CZYX': - tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+), (?P\S+)\)" + if is_zstack: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+)\)" + else: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+), (?P\S+)\)" elif axes == 'ZYX': - tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+), (?P\S+)\)" + if is_zstack: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+)\)" + else: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+), (?P\S+)\)" tile_pattern = re.compile(tile_config_pattern) @@ -71,19 +96,28 @@ d = re.search(tile_pattern,line).groupdict() chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first - #key is: tile-{chunk}_chan-{channel}_z-{zslice} - key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" + if is_zstack: + key = f"tile-{chunk}_chan-{d['channel']}_z-0" + else: + #key is: tile-{chunk}_chan-{channel}_z-{zslice} + key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" map_x[key] = float(d['x']) map_y[key] = float(d['y']) - map_z[key] = float(d['z']) + if is_zstack: + map_z[key] = float(0) + else: + map_z[key] = float(d['z']) + + metadata={} metadata['tiles_x'] = tiles_x metadata['tiles_y'] = tiles_y metadata['channels'] = channels -metadata['zslices'] = zslices +if not is_zstack: + metadata['zslices'] = zslices metadata['prefixes'] = prefixes metadata['chunks'] = chunks metadata['axes'] = axes @@ -97,6 +131,7 @@ metadata['ome_full_metadata'] = ome_dict metadata['PixelSize'] = [ metadata['physical_size_z']/1000.0, metadata['physical_size_y']/1000.0, metadata['physical_size_x']/1000.0 ] #zyx since OME-Zarr is ZYX metadata['PixelSizeUnits'] = 'mm' +metadata['is_zstack'] = is_zstack #write metadata to json with open(snakemake.output.metadata_json, 'w') as fp: diff --git a/workflow/scripts/tif_to_zarr.py b/workflow/scripts/tif_to_zarr.py index bfd4a86..d00d461 100644 --- a/workflow/scripts/tif_to_zarr.py +++ b/workflow/scripts/tif_to_zarr.py @@ -1,7 +1,9 @@ import tifffile +import os import json import dask.array as da -import dask.array.image +from dask.array.image import imread as imread_tifs +from lib.dask_image import imread_pages from itertools import product from dask.diagnostics import ProgressBar @@ -21,14 +23,24 @@ def single_imread(*args): return tifffile.imread(*args,key=0) -#use tif pattern but replace the [ and ] with [[] and []] so glob doesn't choke -in_tif_glob = replace_square_brackets(str(snakemake.params.in_tif_pattern)) - #read metadata json with open(snakemake.input.metadata_json) as fp: metadata = json.load(fp) + +is_zstack = metadata['is_zstack'] + +if is_zstack: + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_zstack"]) +else: + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern"]) + + +#use tif pattern but replace the [ and ] with [[] and []] so glob doesn't choke +in_tif_glob = replace_square_brackets(str(in_tif_pattern)) + + #TODO: put these in top-level metadata for easier access.. size_x=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeX'] size_y=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeY'] @@ -36,31 +48,39 @@ def single_imread(*args): size_c=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeC'] size_tiles=len(metadata['tiles_x'])*len(metadata['tiles_y']) - #now get the first channel and first zslice tif tiles=[] for i_tile,(tilex,tiley) in enumerate(product(metadata['tiles_x'],metadata['tiles_y'])): - + print(f'tile {tilex}x{tiley}, {i_tile}') zstacks=[] for i_chan,channel in enumerate(metadata['channels']): - - zstacks.append(dask.array.image.imread(in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*'), imread=single_imread)) - + print(f'channel {i_chan}') + + if is_zstack: + tif_file = in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel) + + zstacks.append(imread_pages(tif_file)) + print(zstacks[-1].shape) + else: + zstacks.append(imread_tifs(in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*'), imread=single_imread)) #have list of zstack dask arrays for the tile, one for each channel #stack them up and append to list of tiles tiles.append(da.stack(zstacks)) + #now we have list of tiles, each a dask array #stack them up to get our final array darr = da.stack(tiles) +print(darr.shape) +print(darr.chunksize) + #rescale intensities, and recast darr = darr * snakemake.params.intensity_rescaling darr = darr.astype('uint16') - #now we can do the computation itself, storing to zarr print('writing images to zarr with dask') with ProgressBar(): diff --git a/workflow/scripts/tif_to_zarr_gcs.py b/workflow/scripts/tif_to_zarr_gcs.py index 01a9af1..9c234c2 100644 --- a/workflow/scripts/tif_to_zarr_gcs.py +++ b/workflow/scripts/tif_to_zarr_gcs.py @@ -4,6 +4,7 @@ import dask.array.image from itertools import product from dask.diagnostics import ProgressBar +from lib.dask_image import imread_pages import gcsfs gcsfs_opts={'project': snakemake.params.storage_provider_settings['gcs'].get_settings().project, @@ -20,6 +21,19 @@ def replace_square_brackets(pattern): pattern = pattern.replace('##RIGHTBRACKET##','[]]') return pattern +def get_zstack_metadata(gcs_uri,fs): + with fs.open(gcs_uri, 'rb') as file: + tif = tifffile.TiffFile(file) + pages = tif.pages + n_z = len(pages) + sample = tif.pages[0].asarray() + return {'n_z': n_z, 'shape': sample.shape, 'dtype': sample.dtype} + + +def get_tiff_num_pages(fs,gcs_uri): + with fs.open(gcs_uri, 'rb') as file: + return len(tifffile.TiffFile(file).pages) + def read_tiff_slice(fs,gcs_uri, key=0): """Read a single TIFF slice from GCS.""" with fs.open(gcs_uri, 'rb') as file: @@ -37,16 +51,38 @@ def build_zstack(gcs_uris,fs): # Convert the list of delayed objects into a Dask array return da.stack([da.from_delayed(lazy_array, shape=sample_array.shape, dtype=dtype) for lazy_array in lazy_arrays], axis=0) +def build_zstack_from_single(gcs_uri,zstack_metadata,fs): + """Build a z-stack from a single GCS URI """ + pages = [i for i in range(zstack_metadata['n_z'])] + lazy_arrays = [ + dask.delayed(read_tiff_slice)(fs,gcs_uri,page) for page in pages + ] + + # Convert the list of delayed objects into a Dask array + return da.stack([da.from_delayed(lazy_array, shape=zstack_metadata['shape'], dtype=zstack_metadata['dtype']) for lazy_array in lazy_arrays], axis=0) -#use tif pattern but replace the [ and ] with [[] and []] so glob doesn't choke -in_tif_glob = replace_square_brackets(str(snakemake.params.in_tif_pattern)) #read metadata json with open(snakemake.input.metadata_json) as fp: metadata = json.load(fp) + +is_zstack = metadata['is_zstack'] + +if is_zstack: + in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_zstack"] +else: + in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern"] + + + +#use tif pattern but replace the [ and ] with [[] and []] so glob doesn't choke +in_tif_glob = replace_square_brackets(str(in_tif_pattern)) + + + #TODO: put these in top-level metadata for easier access.. size_x=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeX'] size_y=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeY'] @@ -56,14 +92,21 @@ def build_zstack(gcs_uris,fs): #now get the first channel and first zslice tif + +if is_zstack: + #prepopulate this since its the same for all tiles, and takes time to query + zstack_metadata = get_zstack_metadata('gcs://'+in_tif_pattern.format(tilex=metadata['tiles_x'][0],tiley=metadata['tiles_y'][0],prefix=metadata['prefixes'][0],channel=metadata['channels'][0]),fs=fs) + tiles=[] for i_tile,(tilex,tiley) in enumerate(product(metadata['tiles_x'],metadata['tiles_y'])): zstacks=[] for i_chan,channel in enumerate(metadata['channels']): - - - zstacks.append(build_zstack(fs.glob('gcs://'+in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*')),fs=fs)) + + if is_zstack: + zstacks.append(build_zstack_from_single('gcs://'+in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel),fs=fs,zstack_metadata=zstack_metadata)) + else: + zstacks.append(build_zstack(fs.glob('gcs://'+in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*')),fs=fs)) #have list of zstack dask arrays for the tile, one for each channel From 1ebbb745ab97f16d5aee333999f867542c0a54e5 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Sat, 28 Sep 2024 07:32:46 -0400 Subject: [PATCH 04/18] use pyvips for tiff reading on zstacks todo: do the same for gcs version --- workflow/scripts/tif_to_zarr.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/workflow/scripts/tif_to_zarr.py b/workflow/scripts/tif_to_zarr.py index d00d461..4d61196 100644 --- a/workflow/scripts/tif_to_zarr.py +++ b/workflow/scripts/tif_to_zarr.py @@ -1,7 +1,9 @@ import tifffile import os import json +import pyvips import dask.array as da +from dask.delayed import delayed from dask.array.image import imread as imread_tifs from lib.dask_image import imread_pages from itertools import product @@ -23,6 +25,10 @@ def single_imread(*args): return tifffile.imread(*args,key=0) +def read_page_as_numpy(tif_file,page): + """gets a single page (i.e. 2d image) from a tif file zstack""" + return pyvips.Image.new_from_file(tif_file, page=page).numpy() + #read metadata json with open(snakemake.input.metadata_json) as fp: @@ -42,10 +48,10 @@ def single_imread(*args): #TODO: put these in top-level metadata for easier access.. -size_x=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeX'] -size_y=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeY'] -size_z=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeZ'] -size_c=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeC'] +size_x=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeX']) +size_y=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeY']) +size_z=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeZ']) +size_c=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeC']) size_tiles=len(metadata['tiles_x'])*len(metadata['tiles_y']) #now get the first channel and first zslice tif @@ -59,8 +65,13 @@ def single_imread(*args): if is_zstack: tif_file = in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel) - - zstacks.append(imread_pages(tif_file)) + + pages=[] + #read each page + for i_z in range(size_z): + pages.append(da.from_delayed(delayed(read_page_as_numpy)(tif_file,i_z),shape=(size_y,size_x),dtype='uint16')) + + zstacks.append(da.stack(pages)) print(zstacks[-1].shape) else: zstacks.append(imread_tifs(in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*'), imread=single_imread)) From 78fe03f53e97b227ba4db9f503b90e57c1747b41 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Sat, 28 Sep 2024 10:12:12 -0400 Subject: [PATCH 05/18] WIP: compatibility for 1x1 blaze acquisitions (1 tile) - disable fieldfrac? skip stitching? --- workflow/scripts/blaze_to_metadata.py | 142 ++++++++++++++++---------- workflow/scripts/tif_to_zarr.py | 80 ++++++++++----- 2 files changed, 138 insertions(+), 84 deletions(-) diff --git a/workflow/scripts/blaze_to_metadata.py b/workflow/scripts/blaze_to_metadata.py index 196dac5..6eb88cd 100644 --- a/workflow/scripts/blaze_to_metadata.py +++ b/workflow/scripts/blaze_to_metadata.py @@ -16,10 +16,19 @@ else: is_zstack=True -if is_zstack: - in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_zstack"]) +#now check to see if there is just single tile +if ' x ' in Path(tif_files[0]).name: + is_tiled=True else: - in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern"]) + is_tiled=False + +if is_tiled: + if is_zstack: + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_zstack"]) + else: + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern"]) +else: + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_notile"]) #add a wildcard constraint to ensure no #subfolders get parsed (ie don't match anything with / in it): @@ -28,24 +37,34 @@ #parse the filenames to get number of channels, tiles etc.. -if is_zstack: - prefix, tilex, tiley, channel = glob_wildcards(in_tif_pattern_constrained,tif_files) +if is_tiled: + if is_zstack: + prefix, tilex, tiley, channel = glob_wildcards(in_tif_pattern_constrained,tif_files) + else: + prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern_constrained,tif_files) else: - prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern_constrained,tif_files) + prefix, channel, zslice = glob_wildcards(in_tif_pattern_constrained,tif_files) + +if is_tiled: + tiles_x = sorted(list(set(tilex))) + tiles_y = sorted(list(set(tiley))) -tiles_x = sorted(list(set(tilex))) -tiles_y = sorted(list(set(tiley))) channels = sorted(list(set(channel))) prefixes = sorted(list(set(prefix))) + if not is_zstack: zslices = sorted(list(set(zslice))) - #read in series metadata from first file - in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0],zslice=zslices[0]) - +if is_tiled: + if is_zstack: + in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0]) + else: + #read in series metadata from first file + in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0],zslice=zslices[0]) else: - in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0]) + in_tif = in_tif_pattern.format(prefix=prefixes[0],channel=channels[0],zslice=zslices[0]) + raw_tif = tifffile.TiffFile(in_tif,mode='r') @@ -62,68 +81,79 @@ #read tile configuration from the microscope metadata if axes == 'CZYX': - if is_zstack: - tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+)\)" + if is_tiled: + if is_zstack: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+)\)" + else: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+), (?P\S+)\)" else: - tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+), (?P\S+)\)" + print('single tile, axes=CZYX') + elif axes == 'ZYX': - if is_zstack: - tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+)\)" + if is_tiled: + if is_zstack: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+)\)" + else: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+), (?P\S+)\)" else: - tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+), (?P\S+)\)" - - -tile_pattern = re.compile(tile_config_pattern) - -#put it in 3 maps, one for each coord, indexed by tilex, tiley, channel, and aslice -map_x=dict() -map_y=dict() -map_z=dict() - -map_tiles_to_chunk=dict() -chunks = [] -for chunk,(tilex,tiley) in enumerate(product(tiles_x,tiles_y)): - map_tiles_to_chunk[tilex+tiley] = chunk - chunks.append(chunk) - -for line in custom_metadata['TileConfiguration']['@TileConfiguration'].split(' ')[1:]: - d = re.search(tile_pattern,line).groupdict() - chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first - - if is_zstack: - key = f"tile-{chunk}_chan-{d['channel']}_z-0" - else: - #key is: tile-{chunk}_chan-{channel}_z-{zslice} - key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" - - map_x[key] = float(d['x']) - map_y[key] = float(d['y']) - if is_zstack: - map_z[key] = float(0) - else: - map_z[key] = float(d['z']) - + print('single tile, axes=ZYX') + +if is_tiled: + tile_pattern = re.compile(tile_config_pattern) + + #put it in 3 maps, one for each coord, indexed by tilex, tiley, channel, and aslice + map_x=dict() + map_y=dict() + map_z=dict() + + map_tiles_to_chunk=dict() + chunks = [] + for chunk,(tilex,tiley) in enumerate(product(tiles_x,tiles_y)): + map_tiles_to_chunk[tilex+tiley] = chunk + chunks.append(chunk) + + for line in custom_metadata['TileConfiguration']['@TileConfiguration'].split(' ')[1:]: + d = re.search(tile_pattern,line).groupdict() + chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first + + if is_zstack: + key = f"tile-{chunk}_chan-{d['channel']}_z-0" + else: + #key is: tile-{chunk}_chan-{channel}_z-{zslice} + key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" + + map_x[key] = float(d['x']) + map_y[key] = float(d['y']) + if is_zstack: + map_z[key] = float(0) + else: + map_z[key] = float(d['z']) + metadata={} -metadata['tiles_x'] = tiles_x -metadata['tiles_y'] = tiles_y +if is_tiled: + metadata['tiles_x'] = tiles_x + metadata['tiles_y'] = tiles_y + metadata['chunks'] = chunks + metadata['lookup_tile_offset_x'] = map_x + metadata['lookup_tile_offset_y'] = map_y + metadata['lookup_tile_offset_z'] = map_z + metadata['channels'] = channels + if not is_zstack: metadata['zslices'] = zslices metadata['prefixes'] = prefixes -metadata['chunks'] = chunks metadata['axes'] = axes metadata['shape'] = shape metadata['physical_size_x'] = float(physical_size_x) metadata['physical_size_y'] = float(physical_size_y) metadata['physical_size_z'] = float(physical_size_z) -metadata['lookup_tile_offset_x'] = map_x -metadata['lookup_tile_offset_y'] = map_y -metadata['lookup_tile_offset_z'] = map_z metadata['ome_full_metadata'] = ome_dict metadata['PixelSize'] = [ metadata['physical_size_z']/1000.0, metadata['physical_size_y']/1000.0, metadata['physical_size_x']/1000.0 ] #zyx since OME-Zarr is ZYX metadata['PixelSizeUnits'] = 'mm' metadata['is_zstack'] = is_zstack +metadata['is_tiled'] = is_tiled #write metadata to json with open(snakemake.output.metadata_json, 'w') as fp: diff --git a/workflow/scripts/tif_to_zarr.py b/workflow/scripts/tif_to_zarr.py index 4d61196..034a9cb 100644 --- a/workflow/scripts/tif_to_zarr.py +++ b/workflow/scripts/tif_to_zarr.py @@ -36,11 +36,16 @@ def read_page_as_numpy(tif_file,page): is_zstack = metadata['is_zstack'] +is_tiled = metadata['is_tiled'] -if is_zstack: - in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_zstack"]) +if is_tiled: + if is_zstack: + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_zstack"]) + else: + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern"]) else: - in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern"]) + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_notile"]) + #use tif pattern but replace the [ and ] with [[] and []] so glob doesn't choke @@ -52,39 +57,58 @@ def read_page_as_numpy(tif_file,page): size_y=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeY']) size_z=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeZ']) size_c=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeC']) -size_tiles=len(metadata['tiles_x'])*len(metadata['tiles_y']) -#now get the first channel and first zslice tif -tiles=[] -for i_tile,(tilex,tiley) in enumerate(product(metadata['tiles_x'],metadata['tiles_y'])): - print(f'tile {tilex}x{tiley}, {i_tile}') + + + +if is_tiled: + + size_tiles=len(metadata['tiles_x'])*len(metadata['tiles_y']) + + #now get the first channel and first zslice tif + tiles=[] + for i_tile,(tilex,tiley) in enumerate(product(metadata['tiles_x'],metadata['tiles_y'])): + print(f'tile {tilex}x{tiley}, {i_tile}') + zstacks=[] + for i_chan,channel in enumerate(metadata['channels']): + + print(f'channel {i_chan}') + + if is_zstack: + tif_file = in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel) + + pages=[] + #read each page + for i_z in range(size_z): + pages.append(da.from_delayed(delayed(read_page_as_numpy)(tif_file,i_z),shape=(size_y,size_x),dtype='uint16')) + + zstacks.append(da.stack(pages)) + print(zstacks[-1].shape) + else: + zstacks.append(imread_tifs(in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*'), imread=single_imread)) + + #have list of zstack dask arrays for the tile, one for each channel + #stack them up and append to list of tiles + tiles.append(da.stack(zstacks)) + + + + #now we have list of tiles, each a dask array + #stack them up to get our final array + darr = da.stack(tiles) + +else: + #single tile, zslices: zstacks=[] for i_chan,channel in enumerate(metadata['channels']): print(f'channel {i_chan}') + zstacks.append(imread_tifs(in_tif_glob.format(prefix=metadata['prefixes'][0],channel=channel,zslice='*'), imread=single_imread)) - if is_zstack: - tif_file = in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel) - - pages=[] - #read each page - for i_z in range(size_z): - pages.append(da.from_delayed(delayed(read_page_as_numpy)(tif_file,i_z),shape=(size_y,size_x),dtype='uint16')) - - zstacks.append(da.stack(pages)) - print(zstacks[-1].shape) - else: - zstacks.append(imread_tifs(in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*'), imread=single_imread)) - - #have list of zstack dask arrays for the tile, one for each channel - #stack them up and append to list of tiles - tiles.append(da.stack(zstacks)) - + #stack the channels up + darr = da.stack(zstacks) -#now we have list of tiles, each a dask array -#stack them up to get our final array -darr = da.stack(tiles) print(darr.shape) print(darr.chunksize) From 4d1b90eeeace555323720d59ba048b85d08bd488 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Sun, 29 Sep 2024 16:04:21 -0400 Subject: [PATCH 06/18] fix for tileconfig key zstack 0->0000 --- workflow/scripts/blaze_to_metadata.py | 2 +- workflow/scripts/blaze_to_metadata_gcs.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/workflow/scripts/blaze_to_metadata.py b/workflow/scripts/blaze_to_metadata.py index 6eb88cd..25a27e3 100644 --- a/workflow/scripts/blaze_to_metadata.py +++ b/workflow/scripts/blaze_to_metadata.py @@ -117,7 +117,7 @@ chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first if is_zstack: - key = f"tile-{chunk}_chan-{d['channel']}_z-0" + key = f"tile-{chunk}_chan-{d['channel']}_z-0000" else: #key is: tile-{chunk}_chan-{channel}_z-{zslice} key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" diff --git a/workflow/scripts/blaze_to_metadata_gcs.py b/workflow/scripts/blaze_to_metadata_gcs.py index cff5302..fac9747 100644 --- a/workflow/scripts/blaze_to_metadata_gcs.py +++ b/workflow/scripts/blaze_to_metadata_gcs.py @@ -97,7 +97,7 @@ chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first if is_zstack: - key = f"tile-{chunk}_chan-{d['channel']}_z-0" + key = f"tile-{chunk}_chan-{d['channel']}_z-0000" else: #key is: tile-{chunk}_chan-{channel}_z-{zslice} key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" From a2ce2186633811c642a2f5272c2f447cf6dfe80c Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Tue, 1 Oct 2024 13:05:21 -0400 Subject: [PATCH 07/18] update the tif_to_zarr_gcs for tifzstacks on google cloud --- workflow/scripts/blaze_to_metadata_gcs.py | 139 +++++++++++++--------- workflow/scripts/tif_to_zarr_gcs.py | 99 +++++++++------ 2 files changed, 147 insertions(+), 91 deletions(-) diff --git a/workflow/scripts/blaze_to_metadata_gcs.py b/workflow/scripts/blaze_to_metadata_gcs.py index fac9747..b652170 100644 --- a/workflow/scripts/blaze_to_metadata_gcs.py +++ b/workflow/scripts/blaze_to_metadata_gcs.py @@ -23,25 +23,44 @@ else: is_zstack=True -if is_zstack: - in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_zstack"] +#now check to see if there is just single tile +if ' x ' in Path(tifs[0]).name: + is_tiled=True else: - in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern"] + is_tiled=False + + + +if is_tiled: + if is_zstack: + in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_zstack"] + else: + in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern"] +else: + in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_notile"] #parse the filenames to get number of channels, tiles etc.. -if is_zstack: - prefix, tilex, tiley, channel = glob_wildcards(in_tif_pattern,files=tifs) +if is_tiled: + if is_zstack: + prefix, tilex, tiley, channel = glob_wildcards(in_tif_pattern,files=tifs) + else: + prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern,files=tifs) else: - prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern,files=tifs) - zslices = sorted(list(set(zslice))) + prefix, channel, zslice = glob_wildcards(in_tif_pattern,files=tifs) + +if is_tiled: + tiles_x = sorted(list(set(tilex))) + tiles_y = sorted(list(set(tiley))) -tiles_x = sorted(list(set(tilex))) -tiles_y = sorted(list(set(tiley))) channels = sorted(list(set(channel))) prefixes = sorted(list(set(prefix))) +if not is_zstack: + zslices = sorted(list(set(zslice))) + + if not is_zstack: zslices = sorted(list(set(zslice))) @@ -68,70 +87,80 @@ #read tile configuration from the microscope metadata if axes == 'CZYX': - if is_zstack: - tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+)\)" - else: - tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+), (?P\S+)\)" -elif axes == 'ZYX': - if is_zstack: - tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+)\)" - else: - tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+), (?P\S+)\)" - -tile_pattern = re.compile(tile_config_pattern) - -#put it in 3 maps, one for each coord, indexed by tilex, tiley, channel, and aslice -map_x=dict() -map_y=dict() -map_z=dict() - -map_tiles_to_chunk=dict() -chunks = [] -for chunk,(tilex,tiley) in enumerate(product(tiles_x,tiles_y)): - map_tiles_to_chunk[tilex+tiley] = chunk - chunks.append(chunk) - -for line in custom_metadata['TileConfiguration']['@TileConfiguration'].split(' ')[1:]: - - d = re.search(tile_pattern,line).groupdict() - chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first - - if is_zstack: - key = f"tile-{chunk}_chan-{d['channel']}_z-0000" + if is_tiled: + if is_zstack: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+)\)" + else: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+), (?P\S+)\)" else: - #key is: tile-{chunk}_chan-{channel}_z-{zslice} - key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" + print('single tile, axes=CZYX') - map_x[key] = float(d['x']) - map_y[key] = float(d['y']) - if is_zstack: - map_z[key] = float(0) +elif axes == 'ZYX': + if is_tiled: + if is_zstack: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+)\)" + else: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+), (?P\S+)\)" else: - map_z[key] = float(d['z']) - - - + print('single tile, axes=ZYX') + +if is_tiled: + tile_pattern = re.compile(tile_config_pattern) + + #put it in 3 maps, one for each coord, indexed by tilex, tiley, channel, and aslice + map_x=dict() + map_y=dict() + map_z=dict() + + map_tiles_to_chunk=dict() + chunks = [] + for chunk,(tilex,tiley) in enumerate(product(tiles_x,tiles_y)): + map_tiles_to_chunk[tilex+tiley] = chunk + chunks.append(chunk) + + for line in custom_metadata['TileConfiguration']['@TileConfiguration'].split(' ')[1:]: + + d = re.search(tile_pattern,line).groupdict() + chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first + + if is_zstack: + key = f"tile-{chunk}_chan-{d['channel']}_z-0000" + else: + #key is: tile-{chunk}_chan-{channel}_z-{zslice} + key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" + + map_x[key] = float(d['x']) + map_y[key] = float(d['y']) + if is_zstack: + map_z[key] = float(0) + else: + map_z[key] = float(d['z']) + metadata={} -metadata['tiles_x'] = tiles_x -metadata['tiles_y'] = tiles_y +if is_tiled: + metadata['tiles_x'] = tiles_x + metadata['tiles_y'] = tiles_y + metadata['chunks'] = chunks + metadata['lookup_tile_offset_x'] = map_x + metadata['lookup_tile_offset_y'] = map_y + metadata['lookup_tile_offset_z'] = map_z + metadata['channels'] = channels + if not is_zstack: metadata['zslices'] = zslices metadata['prefixes'] = prefixes -metadata['chunks'] = chunks metadata['axes'] = axes metadata['shape'] = shape metadata['physical_size_x'] = float(physical_size_x) metadata['physical_size_y'] = float(physical_size_y) metadata['physical_size_z'] = float(physical_size_z) -metadata['lookup_tile_offset_x'] = map_x -metadata['lookup_tile_offset_y'] = map_y -metadata['lookup_tile_offset_z'] = map_z metadata['ome_full_metadata'] = ome_dict metadata['PixelSize'] = [ metadata['physical_size_z']/1000.0, metadata['physical_size_y']/1000.0, metadata['physical_size_x']/1000.0 ] #zyx since OME-Zarr is ZYX metadata['PixelSizeUnits'] = 'mm' metadata['is_zstack'] = is_zstack +metadata['is_tiled'] = is_tiled #write metadata to json with open(snakemake.output.metadata_json, 'w') as fp: diff --git a/workflow/scripts/tif_to_zarr_gcs.py b/workflow/scripts/tif_to_zarr_gcs.py index 9c234c2..ca9ea11 100644 --- a/workflow/scripts/tif_to_zarr_gcs.py +++ b/workflow/scripts/tif_to_zarr_gcs.py @@ -1,6 +1,7 @@ import tifffile import json import dask.array as da +from dask.delayed import delayed import dask.array.image from itertools import product from dask.diagnostics import ProgressBar @@ -21,14 +22,6 @@ def replace_square_brackets(pattern): pattern = pattern.replace('##RIGHTBRACKET##','[]]') return pattern -def get_zstack_metadata(gcs_uri,fs): - with fs.open(gcs_uri, 'rb') as file: - tif = tifffile.TiffFile(file) - pages = tif.pages - n_z = len(pages) - sample = tif.pages[0].asarray() - return {'n_z': n_z, 'shape': sample.shape, 'dtype': sample.dtype} - def get_tiff_num_pages(fs,gcs_uri): with fs.open(gcs_uri, 'rb') as file: @@ -39,6 +32,19 @@ def read_tiff_slice(fs,gcs_uri, key=0): with fs.open(gcs_uri, 'rb') as file: return tifffile.imread(file, key=key) +def read_page_as_numpy(tif_file_uri, page, fs=None): + """Gets a single page (i.e., 2D image) from a tif file z-stack stored in a cloud URI.""" + + # Open the file from the cloud storage using fsspec + with fs.open(tif_file_uri, 'rb') as f: + # Read the file content into a buffer + file_buffer = f.read() + + # Use pyvips to read from the buffer + return pyvips.Image.new_from_buffer(file_buffer, "", page=page).numpy() + + + def build_zstack(gcs_uris,fs): """Build a z-stack from a list of GCS URIs.""" lazy_arrays = [ @@ -70,11 +76,16 @@ def build_zstack_from_single(gcs_uri,zstack_metadata,fs): is_zstack = metadata['is_zstack'] +is_tiled = metadata['is_tiled'] -if is_zstack: - in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_zstack"] +if is_tiled: + if is_zstack: + in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_zstack"] + else: + in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern"] else: - in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern"] + in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_notile"] + @@ -84,39 +95,55 @@ def build_zstack_from_single(gcs_uri,zstack_metadata,fs): #TODO: put these in top-level metadata for easier access.. -size_x=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeX'] -size_y=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeY'] -size_z=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeZ'] -size_c=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeC'] -size_tiles=len(metadata['tiles_x'])*len(metadata['tiles_y']) +size_x=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeX']) +size_y=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeY']) +size_z=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeZ']) +size_c=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeC']) -#now get the first channel and first zslice tif -if is_zstack: - #prepopulate this since its the same for all tiles, and takes time to query - zstack_metadata = get_zstack_metadata('gcs://'+in_tif_pattern.format(tilex=metadata['tiles_x'][0],tiley=metadata['tiles_y'][0],prefix=metadata['prefixes'][0],channel=metadata['channels'][0]),fs=fs) +if is_tiled: -tiles=[] -for i_tile,(tilex,tiley) in enumerate(product(metadata['tiles_x'],metadata['tiles_y'])): - - zstacks=[] - for i_chan,channel in enumerate(metadata['channels']): - - if is_zstack: - zstacks.append(build_zstack_from_single('gcs://'+in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel),fs=fs,zstack_metadata=zstack_metadata)) - else: - zstacks.append(build_zstack(fs.glob('gcs://'+in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*')),fs=fs)) + size_tiles=len(metadata['tiles_x'])*len(metadata['tiles_y']) + + #now get the first channel and first zslice tif + + + tiles=[] + for i_tile,(tilex,tiley) in enumerate(product(metadata['tiles_x'],metadata['tiles_y'])): + + print(f'tile {tilex}x{tiley}, {i_tile}') + zstacks=[] + for i_chan,channel in enumerate(metadata['channels']): + print(f'channel {i_chan}') + if is_zstack: + + tif_file = in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel) + + pages=[] + #read each page + for i_z in range(size_z): + pages.append(da.from_delayed(delayed(read_page_as_numpy)('gcs://'+tif_file,i_z),shape=(size_y,size_x),dtype='uint16')) + + zstacks.append(da.stack(pages)) - #have list of zstack dask arrays for the tile, one for each channel - #stack them up and append to list of tiles - tiles.append(da.stack(zstacks)) + else: + zstacks.append(build_zstack(fs.glob('gcs://'+in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*')),fs=fs)) + + #have list of zstack dask arrays for the tile, one for each channel + #stack them up and append to list of tiles + tiles.append(da.stack(zstacks)) + + + #now we have list of tiles, each a dask array + #stack them up to get our final array + darr = da.stack(tiles) + +else: + print("single tile data not supported for tif_to_zarr_gcs") -#now we have list of tiles, each a dask array -#stack them up to get our final array -darr = da.stack(tiles) #rescale intensities, and recast darr = darr * snakemake.params.intensity_rescaling From 2ff9fb4f16dc9a271c5f62e6a43576dc16bc4830 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Tue, 1 Oct 2024 13:22:54 -0400 Subject: [PATCH 08/18] fix --- workflow/scripts/tif_to_zarr_gcs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflow/scripts/tif_to_zarr_gcs.py b/workflow/scripts/tif_to_zarr_gcs.py index ca9ea11..2840e08 100644 --- a/workflow/scripts/tif_to_zarr_gcs.py +++ b/workflow/scripts/tif_to_zarr_gcs.py @@ -32,7 +32,7 @@ def read_tiff_slice(fs,gcs_uri, key=0): with fs.open(gcs_uri, 'rb') as file: return tifffile.imread(file, key=key) -def read_page_as_numpy(tif_file_uri, page, fs=None): +def read_page_as_numpy(tif_file_uri, page, fs): """Gets a single page (i.e., 2D image) from a tif file z-stack stored in a cloud URI.""" # Open the file from the cloud storage using fsspec @@ -124,7 +124,7 @@ def build_zstack_from_single(gcs_uri,zstack_metadata,fs): pages=[] #read each page for i_z in range(size_z): - pages.append(da.from_delayed(delayed(read_page_as_numpy)('gcs://'+tif_file,i_z),shape=(size_y,size_x),dtype='uint16')) + pages.append(da.from_delayed(delayed(read_page_as_numpy)('gcs://'+tif_file,i_z,fs),shape=(size_y,size_x),dtype='uint16')) zstacks.append(da.stack(pages)) From dbb875593510186f7ba114a3227bdac27f521068 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Tue, 1 Oct 2024 13:53:20 -0400 Subject: [PATCH 09/18] fix --- workflow/scripts/tif_to_zarr_gcs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/workflow/scripts/tif_to_zarr_gcs.py b/workflow/scripts/tif_to_zarr_gcs.py index 2840e08..ef454d2 100644 --- a/workflow/scripts/tif_to_zarr_gcs.py +++ b/workflow/scripts/tif_to_zarr_gcs.py @@ -7,6 +7,7 @@ from dask.diagnostics import ProgressBar from lib.dask_image import imread_pages import gcsfs +import pyvips gcsfs_opts={'project': snakemake.params.storage_provider_settings['gcs'].get_settings().project, 'token': snakemake.input.creds} From feb5988da3ff80d59d262cae39f1518b511695bb Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Tue, 1 Oct 2024 15:08:49 -0400 Subject: [PATCH 10/18] reduce threads for tif_to_zarr_gcs --- workflow/rules/import.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/rules/import.smk b/workflow/rules/import.smk index 3906b36..8a3f320 100644 --- a/workflow/rules/import.smk +++ b/workflow/rules/import.smk @@ -280,7 +280,7 @@ rule tif_to_zarr_gcs: ), group: "preproc" - threads: config["cores_per_rule"] + threads: 16 #config["cores_per_rule"] container: config["containers"]["spimprep"] script: From 05498fe535e3c1d3b9274183e903c65e63c4f5f1 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Tue, 1 Oct 2024 21:02:44 -0400 Subject: [PATCH 11/18] back to reading whole zstack tif at once --- workflow/rules/import.smk | 2 +- workflow/scripts/tif_to_zarr_gcs.py | 13 ++++--------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/workflow/rules/import.smk b/workflow/rules/import.smk index 8a3f320..3906b36 100644 --- a/workflow/rules/import.smk +++ b/workflow/rules/import.smk @@ -280,7 +280,7 @@ rule tif_to_zarr_gcs: ), group: "preproc" - threads: 16 #config["cores_per_rule"] + threads: config["cores_per_rule"] container: config["containers"]["spimprep"] script: diff --git a/workflow/scripts/tif_to_zarr_gcs.py b/workflow/scripts/tif_to_zarr_gcs.py index ef454d2..9e69ee9 100644 --- a/workflow/scripts/tif_to_zarr_gcs.py +++ b/workflow/scripts/tif_to_zarr_gcs.py @@ -33,8 +33,8 @@ def read_tiff_slice(fs,gcs_uri, key=0): with fs.open(gcs_uri, 'rb') as file: return tifffile.imread(file, key=key) -def read_page_as_numpy(tif_file_uri, page, fs): - """Gets a single page (i.e., 2D image) from a tif file z-stack stored in a cloud URI.""" +def read_stack_as_numpy(tif_file_uri, fs): + """Gets the full stack (i.e., 3D image) from a tif file z-stack stored in a cloud URI.""" # Open the file from the cloud storage using fsspec with fs.open(tif_file_uri, 'rb') as f: @@ -42,7 +42,7 @@ def read_page_as_numpy(tif_file_uri, page, fs): file_buffer = f.read() # Use pyvips to read from the buffer - return pyvips.Image.new_from_buffer(file_buffer, "", page=page).numpy() + return pyvips.Image.new_from_buffer(file_buffer, "").numpy() @@ -122,12 +122,7 @@ def build_zstack_from_single(gcs_uri,zstack_metadata,fs): tif_file = in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel) - pages=[] - #read each page - for i_z in range(size_z): - pages.append(da.from_delayed(delayed(read_page_as_numpy)('gcs://'+tif_file,i_z,fs),shape=(size_y,size_x),dtype='uint16')) - - zstacks.append(da.stack(pages)) + zstacks.append(da.from_delayed(delayed(read_page_as_numpy)('gcs://'+tif_file,fs),shape=(size_z,size_y,size_x),dtype='uint16')) else: zstacks.append(build_zstack(fs.glob('gcs://'+in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*')),fs=fs)) From 8739291d4fecd1065df776133048d4cadc5e88c4 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Fri, 4 Oct 2024 11:52:42 -0400 Subject: [PATCH 12/18] hopefully final fix for reading tif zstacks on gcs downloads temporary file then uses pyvips to read zstack in a delayed function -- should avoid memory issues --- workflow/scripts/tif_to_zarr_gcs.py | 30 +++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/workflow/scripts/tif_to_zarr_gcs.py b/workflow/scripts/tif_to_zarr_gcs.py index 9e69ee9..1981183 100644 --- a/workflow/scripts/tif_to_zarr_gcs.py +++ b/workflow/scripts/tif_to_zarr_gcs.py @@ -1,6 +1,9 @@ +import tempfile import tifffile import json import dask.array as da +import numpy as np +import dask from dask.delayed import delayed import dask.array.image from itertools import product @@ -13,6 +16,7 @@ 'token': snakemake.input.creds} fs = gcsfs.GCSFileSystem(**gcsfs_opts) +dask.config.set(scheduler='synchronous') # overwrite default with single-threaded scheduler def replace_square_brackets(pattern): """replace all [ and ] in the string (have to use @@ -33,16 +37,26 @@ def read_tiff_slice(fs,gcs_uri, key=0): with fs.open(gcs_uri, 'rb') as file: return tifffile.imread(file, key=key) -def read_stack_as_numpy(tif_file_uri, fs): +def read_stack_as_numpy(tif_file_uri, fs, Nz,Ny,Nx): """Gets the full stack (i.e., 3D image) from a tif file z-stack stored in a cloud URI.""" - # Open the file from the cloud storage using fsspec - with fs.open(tif_file_uri, 'rb') as f: - # Read the file content into a buffer - file_buffer = f.read() + #init array + vol = np.zeros((Nz,Ny,Nx),dtype='uint16') + + # Create a temporary file with a .tif extension + with tempfile.NamedTemporaryFile(suffix=".tif", delete=True) as temp_file: + temp_file_path = temp_file.name + + # Open the remote file and write it to the temporary file + with fs.open(tif_file_uri, 'rb') as remote_file: + temp_file.write(remote_file.read()) + + + # Use pyvips to read from the file + for i in range(Nz): + vol[i,:,:] = pyvips.Image.new_from_file(temp_file_path, page=i).numpy() - # Use pyvips to read from the buffer - return pyvips.Image.new_from_buffer(file_buffer, "").numpy() + return vol @@ -122,7 +136,7 @@ def build_zstack_from_single(gcs_uri,zstack_metadata,fs): tif_file = in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel) - zstacks.append(da.from_delayed(delayed(read_page_as_numpy)('gcs://'+tif_file,fs),shape=(size_z,size_y,size_x),dtype='uint16')) + zstacks.append(da.from_delayed(delayed(read_stack_as_numpy)('gcs://'+tif_file,fs,size_z,size_y,size_x),shape=(size_z,size_y,size_x),dtype='uint16')) else: zstacks.append(build_zstack(fs.glob('gcs://'+in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*')),fs=fs)) From 3b03fbddcadec0f31c0e1b0f235c39733ce8bbbc Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Fri, 4 Oct 2024 12:13:02 -0400 Subject: [PATCH 13/18] remove dask sync (was debugging), and set chunks --- workflow/scripts/tif_to_zarr_gcs.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/workflow/scripts/tif_to_zarr_gcs.py b/workflow/scripts/tif_to_zarr_gcs.py index 1981183..899a43e 100644 --- a/workflow/scripts/tif_to_zarr_gcs.py +++ b/workflow/scripts/tif_to_zarr_gcs.py @@ -16,7 +16,6 @@ 'token': snakemake.input.creds} fs = gcsfs.GCSFileSystem(**gcsfs_opts) -dask.config.set(scheduler='synchronous') # overwrite default with single-threaded scheduler def replace_square_brackets(pattern): """replace all [ and ] in the string (have to use @@ -136,7 +135,7 @@ def build_zstack_from_single(gcs_uri,zstack_metadata,fs): tif_file = in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel) - zstacks.append(da.from_delayed(delayed(read_stack_as_numpy)('gcs://'+tif_file,fs,size_z,size_y,size_x),shape=(size_z,size_y,size_x),dtype='uint16')) + zstacks.append(da.from_delayed(delayed(read_stack_as_numpy)('gcs://'+tif_file,fs,size_z,size_y,size_x),shape=(size_z,size_y,size_x),chunks=(1,size_y,size_x),dtype='uint16')) else: zstacks.append(build_zstack(fs.glob('gcs://'+in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*')),fs=fs)) From 6079e6dd4765ad0647da9d2ad11de986891a2251 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Fri, 4 Oct 2024 12:35:59 -0400 Subject: [PATCH 14/18] had to rechunk, as setting chunk size in from_delayed isn't allowed --- workflow/scripts/tif_to_zarr_gcs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/scripts/tif_to_zarr_gcs.py b/workflow/scripts/tif_to_zarr_gcs.py index 899a43e..c9ea189 100644 --- a/workflow/scripts/tif_to_zarr_gcs.py +++ b/workflow/scripts/tif_to_zarr_gcs.py @@ -135,7 +135,7 @@ def build_zstack_from_single(gcs_uri,zstack_metadata,fs): tif_file = in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel) - zstacks.append(da.from_delayed(delayed(read_stack_as_numpy)('gcs://'+tif_file,fs,size_z,size_y,size_x),shape=(size_z,size_y,size_x),chunks=(1,size_y,size_x),dtype='uint16')) + zstacks.append(da.from_delayed(delayed(read_stack_as_numpy)('gcs://'+tif_file,fs,size_z,size_y,size_x),shape=(size_z,size_y,size_x),dtype='uint16').rechunk((1,size_y,size_x))) else: zstacks.append(build_zstack(fs.glob('gcs://'+in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*')),fs=fs)) From edea152438192b1fe15ac02c7b1c86aaa2d6e207 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Fri, 4 Oct 2024 14:03:19 -0400 Subject: [PATCH 15/18] constrain number of threads --- workflow/rules/import.smk | 2 +- workflow/scripts/tif_to_zarr_gcs.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/workflow/rules/import.smk b/workflow/rules/import.smk index 3906b36..d9db6f6 100644 --- a/workflow/rules/import.smk +++ b/workflow/rules/import.smk @@ -280,7 +280,7 @@ rule tif_to_zarr_gcs: ), group: "preproc" - threads: config["cores_per_rule"] + threads: 8 container: config["containers"]["spimprep"] script: diff --git a/workflow/scripts/tif_to_zarr_gcs.py b/workflow/scripts/tif_to_zarr_gcs.py index c9ea189..c738892 100644 --- a/workflow/scripts/tif_to_zarr_gcs.py +++ b/workflow/scripts/tif_to_zarr_gcs.py @@ -11,11 +11,13 @@ from lib.dask_image import imread_pages import gcsfs import pyvips +from dask.distributed import Client, LocalCluster gcsfs_opts={'project': snakemake.params.storage_provider_settings['gcs'].get_settings().project, 'token': snakemake.input.creds} fs = gcsfs.GCSFileSystem(**gcsfs_opts) +cluster = LocalCluster(n_workers=snakemake.threads, threads_per_worker=1) def replace_square_brackets(pattern): """replace all [ and ] in the string (have to use From 7b9196933e815dfdd422e88d8809fd6c55f0d150 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Fri, 4 Oct 2024 16:19:20 -0400 Subject: [PATCH 16/18] use threaded instead --- workflow/scripts/tif_to_zarr_gcs.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/workflow/scripts/tif_to_zarr_gcs.py b/workflow/scripts/tif_to_zarr_gcs.py index c738892..41ed921 100644 --- a/workflow/scripts/tif_to_zarr_gcs.py +++ b/workflow/scripts/tif_to_zarr_gcs.py @@ -11,13 +11,12 @@ from lib.dask_image import imread_pages import gcsfs import pyvips -from dask.distributed import Client, LocalCluster gcsfs_opts={'project': snakemake.params.storage_provider_settings['gcs'].get_settings().project, 'token': snakemake.input.creds} fs = gcsfs.GCSFileSystem(**gcsfs_opts) -cluster = LocalCluster(n_workers=snakemake.threads, threads_per_worker=1) +dask.config.set(scheduler='threads', num_workers=snakemake.threads) def replace_square_brackets(pattern): """replace all [ and ] in the string (have to use From 5fa43fb0c0466a84e2583ff5fcc69b07da0722f5 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Fri, 4 Oct 2024 21:48:05 -0400 Subject: [PATCH 17/18] skip qc, adjust threads --- workflow/Snakefile | 2 +- workflow/rules/import.smk | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index cd19ab3..c997f26 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -33,7 +33,7 @@ rule all: input: get_all_targets(), get_bids_toplevel_targets(), - get_qc_targets(), #need to skip this if using prestitched +# get_qc_targets(), #need to skip this if using prestitched localrule: True diff --git a/workflow/rules/import.smk b/workflow/rules/import.smk index d9db6f6..9fdb401 100644 --- a/workflow/rules/import.smk +++ b/workflow/rules/import.smk @@ -280,7 +280,7 @@ rule tif_to_zarr_gcs: ), group: "preproc" - threads: 8 + threads: 24 container: config["containers"]["spimprep"] script: From c9c4ba91283acfe5d619618f52001eb46ca24ed3 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Sat, 5 Oct 2024 00:15:03 -0400 Subject: [PATCH 18/18] update threads --- workflow/rules/import.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/rules/import.smk b/workflow/rules/import.smk index 9fdb401..421ff80 100644 --- a/workflow/rules/import.smk +++ b/workflow/rules/import.smk @@ -280,7 +280,7 @@ rule tif_to_zarr_gcs: ), group: "preproc" - threads: 24 + threads: 16 container: config["containers"]["spimprep"] script: