diff --git a/README.md b/README.md index e5d5b8d..018def7 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ Takes TIF images (tiled or prestitched) and outputs a validated BIDS Microscopy - Python >= 3.11 - Lightsheet data: - Raw Ultramicroscope Blaze OME TIFF files (include `blaze` in the acquisition tag) + - can be 2D or 3D TIFF files - Prestitched TIFF files (include `prestitched` in the acquisition tag) @@ -63,10 +64,8 @@ or for snakemake<8.0, use: snakemake -c all --use-singularity ``` -Note: if you run the workflow on a system with large memory, you will need to set the heap size for the stitching and fusion rules. This can be done with e.g.: `--set-resources bigstitcher_spark_stitching:mem_mb=60000 bigstitcher_spark_fusion:mem_mb=100000` +Note: if you run the workflow on a system with large memory, you will need to set the heap size for the stitching and fusion rules. This can be done with e.g.: `--set-resources bigstitcher_stitching:mem_mb=60000 bigstitcher_fusion:mem_mb=100000` 7. If you want to run the workflow using a batch job submission server, please see the executor plugins here: https://snakemake.github.io/snakemake-plugin-catalog/ - -Alternate usage of this workflow (making use of conda) is described in the [Snakemake Workflow Catalog](https://snakemake.github.io/snakemake-workflow-catalog?repo=khanlab/SPIMprep). diff --git a/config/config.yml b/config/config.yml index edc16ba..9858eb7 100644 --- a/config/config.yml +++ b/config/config.yml @@ -1,6 +1,5 @@ datasets: 'config/datasets.tsv' - root: 'bids' # can use a s3:// or gcs:// prefix to write output to cloud storage work: 'work' @@ -13,6 +12,7 @@ cores_per_rule: 32 #import wildcards: tilex, tiley, channel, zslice (and prefix - unused) import_blaze: raw_tif_pattern: "{prefix}_Blaze[{tilex} x {tiley}]_C{channel}_xyz-Table Z{zslice}.ome.tif" + raw_tif_pattern_zstack: "{prefix}_Blaze[{tilex} x {tiley}]_C{channel}.ome.tif" intensity_rescaling: 0.5 #raw images seem to be at the upper end of uint16 (over-saturated) -- causes wrapping issues when adjusting with flatfield correction etc. this rescales the raw data as it imports it.. import_prestitched: @@ -36,12 +36,10 @@ bigstitcher: downsample_in_x: 4 downsample_in_y: 4 downsample_in_z: 1 - method: "phase_corr" #unused - methods: #unused + methods: #unused, only for reference phase_corr: "Phase Correlation" optical_flow: "Lucas-Kanade" filter_pairwise_shifts: - enabled: 1 #unused min_r: 0.7 max_shift_total: 50 global_optimization: @@ -64,7 +62,7 @@ bigstitcher: block_size_factor_z: 32 ome_zarr: - desc: sparkstitchedflatcorr + desc: stitchedflatcorr max_downsampling_layers: 5 # e.g. 4 levels: { 0: orig, 1: ds2, 2: ds4, 3: ds8, 4: ds16} rechunk_size: #z, y, x - 1 @@ -155,5 +153,5 @@ report: containers: - spimprep: 'docker://khanlab/spimprep-deps:main' + spimprep: 'docker://khanlab/spimprep-deps:v0.1.0' diff --git a/resources/qc/ff_html_temp.html b/resources/qc/ff_html_temp.html index 658b26d..ba6ee7a 100644 --- a/resources/qc/ff_html_temp.html +++ b/resources/qc/ff_html_temp.html @@ -43,13 +43,13 @@

Chunk - {{ chunk_num }} Channel - {{ loop.index0 }}

{%- for image in channel %} - -

Corrected

+ +

Uncorrected

Slice-{{ image.slice }}

- -

Uncorrected

+ +

Corrected

Slice-{{ image.slice }}

{%- endfor %} @@ -117,4 +117,4 @@

Uncorrected

} }) - \ No newline at end of file + diff --git a/workflow/Snakefile b/workflow/Snakefile index cd19ab3..c997f26 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -33,7 +33,7 @@ rule all: input: get_all_targets(), get_bids_toplevel_targets(), - get_qc_targets(), #need to skip this if using prestitched +# get_qc_targets(), #need to skip this if using prestitched localrule: True diff --git a/workflow/lib/dask_image.py b/workflow/lib/dask_image.py new file mode 100644 index 0000000..dbc06c3 --- /dev/null +++ b/workflow/lib/dask_image.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +import os +from glob import glob + +try: + import tifffile +except (AttributeError, ImportError): + pass + +from dask.array.core import Array +from dask.base import tokenize + + +def add_leading_dimension(x): + return x[None, ...] + +def imread(fn, page): + return tifffile.imread(fn,key=page) + + +def imread_pages(filename, preprocess=None): + + + tif = tifffile.TiffFile(filename) + pages = [i for i in range(len(tif.pages))] + name = "imread-%s" % tokenize(filename, os.path.getmtime(filename)) + + sample = tif.pages[0].asarray() + + if preprocess: + sample = preprocess(sample) + + keys = [(name, i) + (0,) * len(sample.shape) for i in pages] + if preprocess: + values = [ + (add_leading_dimension, (preprocess, (imread, filename,i))) for i in pages + ] + else: + values = [(add_leading_dimension, (imread, filename,i)) for i in pages] + dsk = dict(zip(keys, values)) + + chunks = ((1,) * len(pages),) + tuple((d,) for d in sample.shape) + + return Array(dsk, name, chunks, sample.dtype) diff --git a/workflow/macros/AutostitchMacro.ijm b/workflow/macros/AutostitchMacro.ijm deleted file mode 100644 index e9df4df..0000000 --- a/workflow/macros/AutostitchMacro.ijm +++ /dev/null @@ -1,56 +0,0 @@ -args = getArgument() -args = split(args, " "); - -dataset_xml = args[0] -pairwise_method = args[1] -ds_x = args[2] -ds_y = args[3] -ds_z = args[4] -do_filter = args[5] -min_r = args[6] -do_global = args[7] -global_strategy = args[8] - -run("Calculate pairwise shifts ...", - "select=" + dataset_xml + - " process_angle=[All angles] " + - " process_channel=[All channels] " + - " process_illumination=[All illuminations] " + - " process_tile=[All tiles] " + - " process_timepoint=[All Timepoints] " + - " method=["+ pairwise_method +"] " + - " channels=[Average Channels] " + - " downsample_in_x=" + ds_x + - " downsample_in_y=" + ds_y + - " downsample_in_z=" + ds_z ); - - -if ( do_filter == 1 ){ - -run("Filter pairwise shifts ...", - "select=" + dataset_xml + - " min_r=" + min_r + - " max_r=1 " + - " max_shift_in_x=0 " + - " max_shift_in_y=0 " + - " max_shift_in_z=0 " + - " max_displacement=0"); -} - -if ( do_global == 1 ){ - -run("Optimize globally and apply shifts ...", - "select=" + dataset_xml + - " process_angle=[All angles] " + - " process_channel=[All channels] " + - " process_illumination=[All illuminations] " + - " process_tile=[All tiles] " + - " process_timepoint=[All Timepoints] " + - " relative=2.500 " + - " absolute=3.500 " + - " global_optimization_strategy=["+global_strategy+"] " + - " fix_group_0-0,"); -} - -// quit after we are finished -eval("script", "System.exit(0);"); diff --git a/workflow/macros/FuseImageMacroZarr.ijm b/workflow/macros/FuseImageMacroZarr.ijm deleted file mode 100644 index 9d49b06..0000000 --- a/workflow/macros/FuseImageMacroZarr.ijm +++ /dev/null @@ -1,47 +0,0 @@ -args = getArgument() -args = split(args, " "); - -in_dataset_xml = args[0] -downsampling = args[1] -channel = args[2] -output_zarr = args[3] -block_size_x = args[4] -block_size_y = args[5] -block_size_z = args[6] -block_size_factor_x = args[7] -block_size_factor_y = args[8] -block_size_factor_z = args[9] - -run("Fuse dataset ...", - "select=" + in_dataset_xml + - " process_angle=[All angles] " + - " process_channel=[Single channel (Select from List)] " + - " processing_channel=[channel " + channel + "]" + - " process_illumination=[All illuminations] " + - " process_tile=[All tiles] " + - " process_timepoint=[All Timepoints] " + - " bounding_box=[Currently Selected Views] " + - " preserve_original " + - " downsampling=" + downsampling + - " interpolation=[Linear Interpolation] " + - " pixel_type=[16-bit unsigned integer] " + - " interest_points_for_non_rigid=[-= Disable Non-Rigid =-] " + - " blend produce=[Each timepoint & channel] " + - " fused_image=[ZARR/N5/HDF5 export using N5-API] " + - " define_input=[Auto-load from input data (values shown below)] " + - " export=ZARR create_0 " + - " zarr_dataset_path=" + output_zarr + - " zarr_base_dataset=/ " + - " zarr_dataset_extension=/s0 " + - " show_advanced_block_size_options " + - " block_size_x=" + block_size_x + - " block_size_y=" + block_size_y + - " block_size_z=" + block_size_z + - " block_size_factor_x=" + block_size_factor_x + - " block_size_factor_y=" + block_size_factor_y + - " block_size_factor_z=" + block_size_factor_z + - " subsampling_factors=[{ {1,1,1}}]"); - - -// quit after we are finished -eval("script", "System.exit(0);"); diff --git a/workflow/rules/bigstitcher.smk b/workflow/rules/bigstitcher.smk index 4256e3b..24a4a3e 100644 --- a/workflow/rules/bigstitcher.smk +++ b/workflow/rules/bigstitcher.smk @@ -94,74 +94,7 @@ rule zarr_to_bdv: "../scripts/zarr_to_n5_bdv.py" -rule bigstitcher: - input: - dataset_n5=rules.zarr_to_bdv.output.bdv_n5, - dataset_xml=rules.zarr_to_bdv.output.bdv_xml, - ijm=Path(workflow.basedir) / "macros" / "AutostitchMacro.ijm", - params: - fiji_launcher_cmd=get_fiji_launcher_cmd, - macro_args=get_macro_args_bigstitcher, - rm_old_xml=lambda wildcards, output: f"rm -f {output.dataset_xml}~?", - output: - launcher=temp( - bids( - root=work, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - desc="{desc}", - suffix="bigstitcherfiji.sh", - ) - ), - dataset_xml=temp( - bids( - root=work, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - desc="{desc}", - suffix="bigstitcherfiji.xml", - ) - ), - benchmark: - bids( - root="benchmarks", - datatype="bigstitcherfiji", - subject="{subject}", - sample="{sample}", - acq="{acq}", - desc="{desc}", - suffix="benchmark.tsv", - ) - log: - bids( - root="logs", - datatype="bigstitcherfiji", - subject="{subject}", - sample="{sample}", - acq="{acq}", - desc="{desc}", - suffix="log.txt", - ), - container: - config["containers"]["spimprep"] - resources: - runtime=30, - mem_mb=10000, - threads: config["cores_per_rule"] - group: - "preproc" - shell: - "cp {input.dataset_xml} {output.dataset_xml} && " - " {params.fiji_launcher_cmd} && " - " echo ' -macro {input.ijm} \"{params.macro_args}\"' >> {output.launcher} " - " && {output.launcher} |& tee {log} && {params.rm_old_xml}" - - -rule bigstitcher_spark_stitching: +rule bigstitcher_stitching: input: dataset_n5=rules.zarr_to_bdv.output.bdv_n5, dataset_xml=rules.zarr_to_bdv.output.bdv_xml, @@ -230,10 +163,10 @@ rule bigstitcher_spark_stitching: "{params.rm_old_xml}" -rule bigstitcher_spark_solver: +rule bigstitcher_solver: input: dataset_n5=rules.zarr_to_bdv.output.bdv_n5, - dataset_xml=rules.bigstitcher_spark_stitching.output.dataset_xml, + dataset_xml=rules.bigstitcher_stitching.output.dataset_xml, params: downsampling="--downsampling={dsx},{dsy},{dsz}".format( dsx=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_x"], @@ -295,7 +228,6 @@ rule bigstitcher_spark_solver: "{params.rm_old_xml}" #lambda 0.1 is default (can expose this if needed) - rule bigstitcher_fusion: input: dataset_n5=bids( @@ -320,98 +252,6 @@ rule bigstitcher_fusion: else "stitching" ), ), - ijm=Path(workflow.basedir) / "macros" / "FuseImageMacroZarr.ijm", - params: - fiji_launcher_cmd=get_fiji_launcher_cmd, - macro_args=get_macro_args_zarr_fusion, - output: - launcher=temp( - bids( - root=work, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - desc="stitched{desc}", - stain="{stain}", - suffix="fuseimagen5.sh", - ) - ), - zarr=temp( - directory( - bids( - root=work, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - desc="stitched{desc}", - stain="{stain}", - suffix="SPIM.zarr", - ) - ) - ), - benchmark: - bids( - root="benchmarks", - datatype="fuse_dataset", - subject="{subject}", - sample="{sample}", - acq="{acq}", - desc="stitched{desc}", - stain="{stain}", - suffix="benchmark.tsv", - ) - log: - bids( - root="logs", - datatype="fuse_dataset", - subject="{subject}", - sample="{sample}", - acq="{acq}", - desc="stitched{desc}", - stain="{stain}", - suffix="log.txt", - ), - container: - config["containers"]["spimprep"] - resources: - runtime=30, - mem_mb=40000, - threads: config["cores_per_rule"] - group: - "preproc" - shell: - " {params.fiji_launcher_cmd} && " - " echo ' -macro {input.ijm} \"{params.macro_args}\"' >> {output.launcher} " - " && {output.launcher} |& tee {log}" - - -rule bigstitcher_spark_fusion: - input: - dataset_n5=bids( - root=work, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - desc="{desc}", - suffix="bdv.n5", - ), - dataset_xml=bids( - root=work, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - desc="{desc}", - suffix="bigstitcher{}.xml".format( - "solver" - if config["bigstitcher"]["global_optimization"]["enabled"] - else "stitching" - ), - ), - ijm=Path(workflow.basedir) / "macros" / "FuseImageMacroZarr.ijm", params: channel=lambda wildcards: "--channelId={channel}".format( channel=get_stains(wildcards).index(wildcards.stain) @@ -438,7 +278,7 @@ rule bigstitcher_spark_fusion: datatype="micr", sample="{sample}", acq="{acq}", - desc="sparkstitched{desc}", + desc="stitched{desc}", stain="{stain}", suffix="SPIM.zarr", ) diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 88d40e7..80a773a 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -257,52 +257,6 @@ def get_stains(wildcards): return df.iloc[0][stain_columns].dropna().tolist() -# bigstitcher -def get_fiji_launcher_cmd(wildcards, output, threads, resources): - launcher_opts_find = "-Xincgc" - launcher_opts_replace = f"-XX:+UseG1GC -verbose:gc -XX:+PrintGCDateStamps -XX:ActiveProcessorCount={threads}" - pipe_cmds = [] - pipe_cmds.append("ImageJ-linux64 --dry-run --headless --console") - pipe_cmds.append(f"sed 's/{launcher_opts_find}/{launcher_opts_replace}'/") - pipe_cmds.append( - rf"sed 's/-Xmx[0-9a-z]\+/-Xmx{resources.mem_mb}m -Xms{resources.mem_mb}m/'" - ) - pipe_cmds.append("tr --delete '\\n'") - return "|".join(pipe_cmds) + f" > {output.launcher} && chmod a+x {output.launcher} " - - -def get_macro_args_bigstitcher(wildcards, input, output): - return "{dataset_xml} {pairwise_method} {ds_x} {ds_y} {ds_z} {do_filter} {min_r} {do_global} {global_strategy}".format( - dataset_xml=output.dataset_xml, - pairwise_method=config["bigstitcher"]["calc_pairwise_shifts"]["methods"][ - config["bigstitcher"]["calc_pairwise_shifts"]["method"] - ], - ds_x=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_x"], - ds_y=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_y"], - ds_z=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_z"], - do_filter=config["bigstitcher"]["filter_pairwise_shifts"]["enabled"], - min_r=config["bigstitcher"]["filter_pairwise_shifts"]["min_r"], - do_global=config["bigstitcher"]["global_optimization"]["enabled"], - global_strategy=config["bigstitcher"]["global_optimization"]["strategies"][ - config["bigstitcher"]["global_optimization"]["strategy"] - ], - ) - - -def get_macro_args_zarr_fusion(wildcards, input, output): - return "{dataset_xml} {downsampling} {channel:02d} {output_zarr} {bsx} {bsy} {bsz} {bsfx} {bsfy} {bsfz}".format( - dataset_xml=input.dataset_xml, - downsampling=config["bigstitcher"]["fuse_dataset"]["downsampling"], - channel=get_stains(wildcards).index(wildcards.stain), - output_zarr=output.zarr, - bsx=config["bigstitcher"]["fuse_dataset"]["block_size_x"], - bsy=config["bigstitcher"]["fuse_dataset"]["block_size_y"], - bsz=config["bigstitcher"]["fuse_dataset"]["block_size_z"], - bsfx=config["bigstitcher"]["fuse_dataset"]["block_size_factor_x"], - bsfy=config["bigstitcher"]["fuse_dataset"]["block_size_factor_y"], - bsfz=config["bigstitcher"]["fuse_dataset"]["block_size_factor_z"], - ) - def get_output_ome_zarr_uri(): if is_remote(config["root"]): diff --git a/workflow/rules/import.smk b/workflow/rules/import.smk index 8a9074a..421ff80 100644 --- a/workflow/rules/import.smk +++ b/workflow/rules/import.smk @@ -79,11 +79,6 @@ rule blaze_to_metadata_gcs: rule blaze_to_metadata: input: ome_dir=get_input_dataset, - params: - in_tif_pattern=lambda wildcards, input: os.path.join( - input.ome_dir, - config["import_blaze"]["raw_tif_pattern"], - ), output: metadata_json=temp( bids( @@ -197,10 +192,6 @@ rule tif_to_zarr: ome_dir=get_input_dataset, metadata_json=rules.copy_blaze_metadata.output.metadata_json, params: - in_tif_pattern=lambda wildcards, input: os.path.join( - input.ome_dir, - config["import_blaze"]["raw_tif_pattern"], - ), intensity_rescaling=config["import_blaze"]["intensity_rescaling"], output: zarr=temp( @@ -289,7 +280,7 @@ rule tif_to_zarr_gcs: ), group: "preproc" - threads: config["cores_per_rule"] + threads: 16 container: config["containers"]["spimprep"] script: diff --git a/workflow/scripts/blaze_to_metadata.py b/workflow/scripts/blaze_to_metadata.py index 608158e..25a27e3 100644 --- a/workflow/scripts/blaze_to_metadata.py +++ b/workflow/scripts/blaze_to_metadata.py @@ -4,25 +4,67 @@ import re from itertools import product from snakemake.io import glob_wildcards - -in_tif_pattern = snakemake.params.in_tif_pattern +from glob import glob +from pathlib import Path +import os + +tif_files = glob(f"{snakemake.input.ome_dir}/*.tif") + +#check first tif file to see if it is zstack or not: +if 'xyz-Table Z' in Path(tif_files[0]).name: + is_zstack=False +else: + is_zstack=True + +#now check to see if there is just single tile +if ' x ' in Path(tif_files[0]).name: + is_tiled=True +else: + is_tiled=False + +if is_tiled: + if is_zstack: + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_zstack"]) + else: + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern"]) +else: + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_notile"]) #add a wildcard constraint to ensure no #subfolders get parsed (ie don't match anything with / in it): prefix_constraint=r'[^/]+' in_tif_pattern_constrained = in_tif_pattern.replace('{prefix}',f'{{prefix,{prefix_constraint}}}') + #parse the filenames to get number of channels, tiles etc.. -prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern_constrained) +if is_tiled: + if is_zstack: + prefix, tilex, tiley, channel = glob_wildcards(in_tif_pattern_constrained,tif_files) + else: + prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern_constrained,tif_files) +else: + prefix, channel, zslice = glob_wildcards(in_tif_pattern_constrained,tif_files) + +if is_tiled: + tiles_x = sorted(list(set(tilex))) + tiles_y = sorted(list(set(tiley))) -tiles_x = sorted(list(set(tilex))) -tiles_y = sorted(list(set(tiley))) channels = sorted(list(set(channel))) -zslices = sorted(list(set(zslice))) prefixes = sorted(list(set(prefix))) -#read in series metadata from first file -in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0],zslice=zslices[0]) + +if not is_zstack: + zslices = sorted(list(set(zslice))) + +if is_tiled: + if is_zstack: + in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0]) + else: + #read in series metadata from first file + in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0],zslice=zslices[0]) +else: + in_tif = in_tif_pattern.format(prefix=prefixes[0],channel=channels[0],zslice=zslices[0]) + raw_tif = tifffile.TiffFile(in_tif,mode='r') @@ -37,57 +79,81 @@ custom_metadata = ome_dict['OME']['Image']['ca:CustomAttributes'] - #read tile configuration from the microscope metadata if axes == 'CZYX': - tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+), (?P\S+)\)" + if is_tiled: + if is_zstack: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+)\)" + else: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+), (?P\S+)\)" + else: + print('single tile, axes=CZYX') + elif axes == 'ZYX': - tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+), (?P\S+)\)" - -tile_pattern = re.compile(tile_config_pattern) - -#put it in 3 maps, one for each coord, indexed by tilex, tiley, channel, and aslice -map_x=dict() -map_y=dict() -map_z=dict() - -map_tiles_to_chunk=dict() -chunks = [] -for chunk,(tilex,tiley) in enumerate(product(tiles_x,tiles_y)): - map_tiles_to_chunk[tilex+tiley] = chunk - chunks.append(chunk) - -for line in custom_metadata['TileConfiguration']['@TileConfiguration'].split(' ')[1:]: - - d = re.search(tile_pattern,line).groupdict() - chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first - - #key is: tile-{chunk}_chan-{channel}_z-{zslice} - key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" - - map_x[key] = float(d['x']) - map_y[key] = float(d['y']) - map_z[key] = float(d['z']) - + if is_tiled: + if is_zstack: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+)\)" + else: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+), (?P\S+)\)" + else: + print('single tile, axes=ZYX') + +if is_tiled: + tile_pattern = re.compile(tile_config_pattern) + + #put it in 3 maps, one for each coord, indexed by tilex, tiley, channel, and aslice + map_x=dict() + map_y=dict() + map_z=dict() + + map_tiles_to_chunk=dict() + chunks = [] + for chunk,(tilex,tiley) in enumerate(product(tiles_x,tiles_y)): + map_tiles_to_chunk[tilex+tiley] = chunk + chunks.append(chunk) + + for line in custom_metadata['TileConfiguration']['@TileConfiguration'].split(' ')[1:]: + d = re.search(tile_pattern,line).groupdict() + chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first + + if is_zstack: + key = f"tile-{chunk}_chan-{d['channel']}_z-0000" + else: + #key is: tile-{chunk}_chan-{channel}_z-{zslice} + key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" + + map_x[key] = float(d['x']) + map_y[key] = float(d['y']) + if is_zstack: + map_z[key] = float(0) + else: + map_z[key] = float(d['z']) + metadata={} -metadata['tiles_x'] = tiles_x -metadata['tiles_y'] = tiles_y +if is_tiled: + metadata['tiles_x'] = tiles_x + metadata['tiles_y'] = tiles_y + metadata['chunks'] = chunks + metadata['lookup_tile_offset_x'] = map_x + metadata['lookup_tile_offset_y'] = map_y + metadata['lookup_tile_offset_z'] = map_z + metadata['channels'] = channels -metadata['zslices'] = zslices + +if not is_zstack: + metadata['zslices'] = zslices metadata['prefixes'] = prefixes -metadata['chunks'] = chunks metadata['axes'] = axes metadata['shape'] = shape metadata['physical_size_x'] = float(physical_size_x) metadata['physical_size_y'] = float(physical_size_y) metadata['physical_size_z'] = float(physical_size_z) -metadata['lookup_tile_offset_x'] = map_x -metadata['lookup_tile_offset_y'] = map_y -metadata['lookup_tile_offset_z'] = map_z metadata['ome_full_metadata'] = ome_dict metadata['PixelSize'] = [ metadata['physical_size_z']/1000.0, metadata['physical_size_y']/1000.0, metadata['physical_size_x']/1000.0 ] #zyx since OME-Zarr is ZYX metadata['PixelSizeUnits'] = 'mm' +metadata['is_zstack'] = is_zstack +metadata['is_tiled'] = is_tiled #write metadata to json with open(snakemake.output.metadata_json, 'w') as fp: diff --git a/workflow/scripts/blaze_to_metadata_gcs.py b/workflow/scripts/blaze_to_metadata_gcs.py index 7817d87..b652170 100644 --- a/workflow/scripts/blaze_to_metadata_gcs.py +++ b/workflow/scripts/blaze_to_metadata_gcs.py @@ -5,12 +5,11 @@ import os from itertools import product from snakemake.io import glob_wildcards +from pathlib import Path import gcsfs from lib.cloud_io import get_fsspec dataset_uri = snakemake.params.dataset_path -in_tif_pattern = snakemake.params.in_tif_pattern - gcsfs_opts={'project': snakemake.params.storage_provider_settings['gcs'].get_settings().project, 'token': snakemake.input.creds} @@ -18,18 +17,57 @@ tifs = fs.glob(f"{dataset_uri}/*.tif") +#check first tif file to see if it is zstack or not: +if 'xyz-Table Z' in Path(tifs[0]).name: + is_zstack=False +else: + is_zstack=True + +#now check to see if there is just single tile +if ' x ' in Path(tifs[0]).name: + is_tiled=True +else: + is_tiled=False + + + +if is_tiled: + if is_zstack: + in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_zstack"] + else: + in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern"] +else: + in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_notile"] + + + #parse the filenames to get number of channels, tiles etc.. -prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern,files=tifs) +if is_tiled: + if is_zstack: + prefix, tilex, tiley, channel = glob_wildcards(in_tif_pattern,files=tifs) + else: + prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern,files=tifs) +else: + prefix, channel, zslice = glob_wildcards(in_tif_pattern,files=tifs) + +if is_tiled: + tiles_x = sorted(list(set(tilex))) + tiles_y = sorted(list(set(tiley))) -tiles_x = sorted(list(set(tilex))) -tiles_y = sorted(list(set(tiley))) channels = sorted(list(set(channel))) -zslices = sorted(list(set(zslice))) prefixes = sorted(list(set(prefix))) +if not is_zstack: + zslices = sorted(list(set(zslice))) + + +if not is_zstack: + zslices = sorted(list(set(zslice))) -#read in series metadata from first file -in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0],zslice=zslices[0]) + #read in series metadata from first file + in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0],zslice=zslices[0]) +else: + in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0]) with fs.open(f"gcs://{in_tif}", 'rb') as tif_file: raw_tif = tifffile.TiffFile(tif_file,mode='r') @@ -49,54 +87,80 @@ #read tile configuration from the microscope metadata if axes == 'CZYX': - tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+), (?P\S+)\)" + if is_tiled: + if is_zstack: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+)\)" + else: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+), (?P\S+)\)" + else: + print('single tile, axes=CZYX') + elif axes == 'ZYX': - tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+), (?P\S+)\)" - -tile_pattern = re.compile(tile_config_pattern) - -#put it in 3 maps, one for each coord, indexed by tilex, tiley, channel, and aslice -map_x=dict() -map_y=dict() -map_z=dict() - -map_tiles_to_chunk=dict() -chunks = [] -for chunk,(tilex,tiley) in enumerate(product(tiles_x,tiles_y)): - map_tiles_to_chunk[tilex+tiley] = chunk - chunks.append(chunk) - -for line in custom_metadata['TileConfiguration']['@TileConfiguration'].split(' ')[1:]: - - d = re.search(tile_pattern,line).groupdict() - chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first - - #key is: tile-{chunk}_chan-{channel}_z-{zslice} - key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" - - map_x[key] = float(d['x']) - map_y[key] = float(d['y']) - map_z[key] = float(d['z']) - + if is_tiled: + if is_zstack: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+)\)" + else: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+), (?P\S+)\)" + else: + print('single tile, axes=ZYX') + +if is_tiled: + tile_pattern = re.compile(tile_config_pattern) + + #put it in 3 maps, one for each coord, indexed by tilex, tiley, channel, and aslice + map_x=dict() + map_y=dict() + map_z=dict() + + map_tiles_to_chunk=dict() + chunks = [] + for chunk,(tilex,tiley) in enumerate(product(tiles_x,tiles_y)): + map_tiles_to_chunk[tilex+tiley] = chunk + chunks.append(chunk) + + for line in custom_metadata['TileConfiguration']['@TileConfiguration'].split(' ')[1:]: + + d = re.search(tile_pattern,line).groupdict() + chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first + + if is_zstack: + key = f"tile-{chunk}_chan-{d['channel']}_z-0000" + else: + #key is: tile-{chunk}_chan-{channel}_z-{zslice} + key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" + + map_x[key] = float(d['x']) + map_y[key] = float(d['y']) + if is_zstack: + map_z[key] = float(0) + else: + map_z[key] = float(d['z']) + metadata={} -metadata['tiles_x'] = tiles_x -metadata['tiles_y'] = tiles_y +if is_tiled: + metadata['tiles_x'] = tiles_x + metadata['tiles_y'] = tiles_y + metadata['chunks'] = chunks + metadata['lookup_tile_offset_x'] = map_x + metadata['lookup_tile_offset_y'] = map_y + metadata['lookup_tile_offset_z'] = map_z + metadata['channels'] = channels -metadata['zslices'] = zslices + +if not is_zstack: + metadata['zslices'] = zslices metadata['prefixes'] = prefixes -metadata['chunks'] = chunks metadata['axes'] = axes metadata['shape'] = shape metadata['physical_size_x'] = float(physical_size_x) metadata['physical_size_y'] = float(physical_size_y) metadata['physical_size_z'] = float(physical_size_z) -metadata['lookup_tile_offset_x'] = map_x -metadata['lookup_tile_offset_y'] = map_y -metadata['lookup_tile_offset_z'] = map_z metadata['ome_full_metadata'] = ome_dict metadata['PixelSize'] = [ metadata['physical_size_z']/1000.0, metadata['physical_size_y']/1000.0, metadata['physical_size_x']/1000.0 ] #zyx since OME-Zarr is ZYX metadata['PixelSizeUnits'] = 'mm' +metadata['is_zstack'] = is_zstack +metadata['is_tiled'] = is_tiled #write metadata to json with open(snakemake.output.metadata_json, 'w') as fp: diff --git a/workflow/scripts/tif_to_zarr.py b/workflow/scripts/tif_to_zarr.py index bfd4a86..034a9cb 100644 --- a/workflow/scripts/tif_to_zarr.py +++ b/workflow/scripts/tif_to_zarr.py @@ -1,7 +1,11 @@ import tifffile +import os import json +import pyvips import dask.array as da -import dask.array.image +from dask.delayed import delayed +from dask.array.image import imread as imread_tifs +from lib.dask_image import imread_pages from itertools import product from dask.diagnostics import ProgressBar @@ -21,46 +25,97 @@ def single_imread(*args): return tifffile.imread(*args,key=0) -#use tif pattern but replace the [ and ] with [[] and []] so glob doesn't choke -in_tif_glob = replace_square_brackets(str(snakemake.params.in_tif_pattern)) +def read_page_as_numpy(tif_file,page): + """gets a single page (i.e. 2d image) from a tif file zstack""" + return pyvips.Image.new_from_file(tif_file, page=page).numpy() #read metadata json with open(snakemake.input.metadata_json) as fp: metadata = json.load(fp) + +is_zstack = metadata['is_zstack'] +is_tiled = metadata['is_tiled'] + +if is_tiled: + if is_zstack: + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_zstack"]) + else: + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern"]) +else: + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_notile"]) + + + +#use tif pattern but replace the [ and ] with [[] and []] so glob doesn't choke +in_tif_glob = replace_square_brackets(str(in_tif_pattern)) + + #TODO: put these in top-level metadata for easier access.. -size_x=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeX'] -size_y=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeY'] -size_z=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeZ'] -size_c=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeC'] -size_tiles=len(metadata['tiles_x'])*len(metadata['tiles_y']) +size_x=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeX']) +size_y=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeY']) +size_z=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeZ']) +size_c=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeC']) + + -#now get the first channel and first zslice tif -tiles=[] -for i_tile,(tilex,tiley) in enumerate(product(metadata['tiles_x'],metadata['tiles_y'])): - +if is_tiled: + + size_tiles=len(metadata['tiles_x'])*len(metadata['tiles_y']) + + #now get the first channel and first zslice tif + tiles=[] + for i_tile,(tilex,tiley) in enumerate(product(metadata['tiles_x'],metadata['tiles_y'])): + print(f'tile {tilex}x{tiley}, {i_tile}') + zstacks=[] + for i_chan,channel in enumerate(metadata['channels']): + + print(f'channel {i_chan}') + + if is_zstack: + tif_file = in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel) + + pages=[] + #read each page + for i_z in range(size_z): + pages.append(da.from_delayed(delayed(read_page_as_numpy)(tif_file,i_z),shape=(size_y,size_x),dtype='uint16')) + + zstacks.append(da.stack(pages)) + print(zstacks[-1].shape) + else: + zstacks.append(imread_tifs(in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*'), imread=single_imread)) + + #have list of zstack dask arrays for the tile, one for each channel + #stack them up and append to list of tiles + tiles.append(da.stack(zstacks)) + + + + #now we have list of tiles, each a dask array + #stack them up to get our final array + darr = da.stack(tiles) + +else: + #single tile, zslices: zstacks=[] for i_chan,channel in enumerate(metadata['channels']): - - zstacks.append(dask.array.image.imread(in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*'), imread=single_imread)) - + print(f'channel {i_chan}') + zstacks.append(imread_tifs(in_tif_glob.format(prefix=metadata['prefixes'][0],channel=channel,zslice='*'), imread=single_imread)) - #have list of zstack dask arrays for the tile, one for each channel - #stack them up and append to list of tiles - tiles.append(da.stack(zstacks)) + #stack the channels up + darr = da.stack(zstacks) -#now we have list of tiles, each a dask array -#stack them up to get our final array -darr = da.stack(tiles) + +print(darr.shape) +print(darr.chunksize) #rescale intensities, and recast darr = darr * snakemake.params.intensity_rescaling darr = darr.astype('uint16') - #now we can do the computation itself, storing to zarr print('writing images to zarr with dask') with ProgressBar(): diff --git a/workflow/scripts/tif_to_zarr_gcs.py b/workflow/scripts/tif_to_zarr_gcs.py index 01a9af1..41ed921 100644 --- a/workflow/scripts/tif_to_zarr_gcs.py +++ b/workflow/scripts/tif_to_zarr_gcs.py @@ -1,15 +1,22 @@ +import tempfile import tifffile import json import dask.array as da +import numpy as np +import dask +from dask.delayed import delayed import dask.array.image from itertools import product from dask.diagnostics import ProgressBar +from lib.dask_image import imread_pages import gcsfs +import pyvips gcsfs_opts={'project': snakemake.params.storage_provider_settings['gcs'].get_settings().project, 'token': snakemake.input.creds} fs = gcsfs.GCSFileSystem(**gcsfs_opts) +dask.config.set(scheduler='threads', num_workers=snakemake.threads) def replace_square_brackets(pattern): """replace all [ and ] in the string (have to use @@ -20,11 +27,39 @@ def replace_square_brackets(pattern): pattern = pattern.replace('##RIGHTBRACKET##','[]]') return pattern + +def get_tiff_num_pages(fs,gcs_uri): + with fs.open(gcs_uri, 'rb') as file: + return len(tifffile.TiffFile(file).pages) + def read_tiff_slice(fs,gcs_uri, key=0): """Read a single TIFF slice from GCS.""" with fs.open(gcs_uri, 'rb') as file: return tifffile.imread(file, key=key) +def read_stack_as_numpy(tif_file_uri, fs, Nz,Ny,Nx): + """Gets the full stack (i.e., 3D image) from a tif file z-stack stored in a cloud URI.""" + + #init array + vol = np.zeros((Nz,Ny,Nx),dtype='uint16') + + # Create a temporary file with a .tif extension + with tempfile.NamedTemporaryFile(suffix=".tif", delete=True) as temp_file: + temp_file_path = temp_file.name + + # Open the remote file and write it to the temporary file + with fs.open(tif_file_uri, 'rb') as remote_file: + temp_file.write(remote_file.read()) + + + # Use pyvips to read from the file + for i in range(Nz): + vol[i,:,:] = pyvips.Image.new_from_file(temp_file_path, page=i).numpy() + + return vol + + + def build_zstack(gcs_uris,fs): """Build a z-stack from a list of GCS URIs.""" lazy_arrays = [ @@ -37,43 +72,88 @@ def build_zstack(gcs_uris,fs): # Convert the list of delayed objects into a Dask array return da.stack([da.from_delayed(lazy_array, shape=sample_array.shape, dtype=dtype) for lazy_array in lazy_arrays], axis=0) +def build_zstack_from_single(gcs_uri,zstack_metadata,fs): + """Build a z-stack from a single GCS URI """ + pages = [i for i in range(zstack_metadata['n_z'])] + lazy_arrays = [ + dask.delayed(read_tiff_slice)(fs,gcs_uri,page) for page in pages + ] + + # Convert the list of delayed objects into a Dask array + return da.stack([da.from_delayed(lazy_array, shape=zstack_metadata['shape'], dtype=zstack_metadata['dtype']) for lazy_array in lazy_arrays], axis=0) -#use tif pattern but replace the [ and ] with [[] and []] so glob doesn't choke -in_tif_glob = replace_square_brackets(str(snakemake.params.in_tif_pattern)) #read metadata json with open(snakemake.input.metadata_json) as fp: metadata = json.load(fp) + +is_zstack = metadata['is_zstack'] +is_tiled = metadata['is_tiled'] + +if is_tiled: + if is_zstack: + in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_zstack"] + else: + in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern"] +else: + in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_notile"] + + + + +#use tif pattern but replace the [ and ] with [[] and []] so glob doesn't choke +in_tif_glob = replace_square_brackets(str(in_tif_pattern)) + + + #TODO: put these in top-level metadata for easier access.. -size_x=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeX'] -size_y=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeY'] -size_z=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeZ'] -size_c=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeC'] -size_tiles=len(metadata['tiles_x'])*len(metadata['tiles_y']) +size_x=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeX']) +size_y=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeY']) +size_z=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeZ']) +size_c=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeC']) -#now get the first channel and first zslice tif -tiles=[] -for i_tile,(tilex,tiley) in enumerate(product(metadata['tiles_x'],metadata['tiles_y'])): - - zstacks=[] - for i_chan,channel in enumerate(metadata['channels']): +if is_tiled: + + size_tiles=len(metadata['tiles_x'])*len(metadata['tiles_y']) + + #now get the first channel and first zslice tif + + + tiles=[] + for i_tile,(tilex,tiley) in enumerate(product(metadata['tiles_x'],metadata['tiles_y'])): - zstacks.append(build_zstack(fs.glob('gcs://'+in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*')),fs=fs)) + print(f'tile {tilex}x{tiley}, {i_tile}') + zstacks=[] + for i_chan,channel in enumerate(metadata['channels']): + print(f'channel {i_chan}') + if is_zstack: + + tif_file = in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel) + + zstacks.append(da.from_delayed(delayed(read_stack_as_numpy)('gcs://'+tif_file,fs,size_z,size_y,size_x),shape=(size_z,size_y,size_x),dtype='uint16').rechunk((1,size_y,size_x))) + + else: + zstacks.append(build_zstack(fs.glob('gcs://'+in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*')),fs=fs)) + + + #have list of zstack dask arrays for the tile, one for each channel + #stack them up and append to list of tiles + tiles.append(da.stack(zstacks)) + - #have list of zstack dask arrays for the tile, one for each channel - #stack them up and append to list of tiles - tiles.append(da.stack(zstacks)) + #now we have list of tiles, each a dask array + #stack them up to get our final array + darr = da.stack(tiles) +else: + print("single tile data not supported for tif_to_zarr_gcs") -#now we have list of tiles, each a dask array -#stack them up to get our final array -darr = da.stack(tiles) #rescale intensities, and recast darr = darr * snakemake.params.intensity_rescaling