Skip to content

Commit

Permalink
S3 (#247)
Browse files Browse the repository at this point in the history
  • Loading branch information
mzouink authored May 3, 2024
2 parents d168877 + ee56cb3 commit 9ee5feb
Show file tree
Hide file tree
Showing 7 changed files with 71 additions and 4 deletions.
7 changes: 3 additions & 4 deletions dacapo/store/create_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def create_config_store():
db_name = options.mongo_db_name
return MongoConfigStore(db_host, db_name)
elif options.type == "files":
store_path = Path(options.runs_base_dir).expanduser()
store_path = Path(options.runs_base_dir)
return FileConfigStore(store_path / "configs")
else:
raise ValueError(f"Unknown store type {options.type}")
Expand Down Expand Up @@ -62,7 +62,7 @@ def create_stats_store():
db_name = options.mongo_db_name
return MongoStatsStore(db_host, db_name)
elif options.type == "files":
store_path = Path(options.runs_base_dir).expanduser()
store_path = Path(options.runs_base_dir)
return FileStatsStore(store_path / "stats")
else:
raise ValueError(f"Unknown store type {options.type}")
Expand All @@ -85,8 +85,7 @@ def create_weights_store():

options = Options.instance()

# currently, only the LocalWeightsStore is supported
base_dir = Path(options.runs_base_dir).expanduser()
base_dir = Path(options.runs_base_dir)
return LocalWeightsStore(base_dir)


Expand Down
14 changes: 14 additions & 0 deletions examples/aws/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
You can work locally using S3 data by setting the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` environment variables. You can also set the `AWS_REGION` environment variable to specify the region to use. If you are using a profile, you can set the `AWS_PROFILE` environment variable to specify the profile to use.

```bash
aws configure
```

In order to store checkpoints and experiments data in S3, you need to modify `dacapo.yaml` to include the following:

```yaml
runs_base_dir: "s3://dacapotest"
```
For configs and stats, you can save them locally or s3 by setting `type: files` or for mongodb by setting `type: mongo` in the `dacapo.yaml` file.

30 changes: 30 additions & 0 deletions examples/aws/aws_store_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# %%
import dacapo

# from import create_config_store

config_store = dacapo.store.create_store.create_config_store()

# %%
from dacapo import Options

options = Options.instance()

# %%
options
# %%
from dacapo.experiments.tasks import DistanceTaskConfig

task_config = DistanceTaskConfig(
name="cosem_distance_task_4nm",
channels=["mito"],
clip_distance=40.0,
tol_distance=40.0,
scale_factor=80.0,
)

# %%

config_store.store_task_config(task_config)

# %%
3 changes: 3 additions & 0 deletions examples/aws/cloud_csv.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
train,s3://janelia-cosem-datasets/jrc_hela-2/jrc_hela-2.zarr,recon-1/em/fibsem-uint8,s3://janelia-cosem-datasets/jrc_hela-2/jrc_hela-2.zarr,recon-1/labels/groundtruth/crop155/[nuc]
train,s3://janelia-cosem-datasets/jrc_hela-2/jrc_hela-2.zarr,recon-1/em/fibsem-uint8,s3://janelia-cosem-datasets/jrc_hela-2/jrc_hela-2.zarr,recon-1/labels/groundtruth/crop7/[nuc]
val,s3://janelia-cosem-datasets/jrc_hela-2/jrc_hela-2.zarr,recon-1/em/fibsem-uint8,s3://janelia-cosem-datasets/jrc_hela-2/jrc_hela-2.zarr,recon-1/labels/groundtruth/crop6/[nuc]
3 changes: 3 additions & 0 deletions examples/aws/dacapo.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@

runs_base_dir: "s3://dacapotest"
type: "files"
16 changes: 16 additions & 0 deletions examples/aws/s3_datasplit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# %%
from dacapo.experiments.datasplits import DataSplitGenerator
from funlib.geometry import Coordinate

input_resolution = Coordinate(8, 8, 8)
output_resolution = Coordinate(4, 4, 4)
datasplit_config = DataSplitGenerator.generate_from_csv(
"cloud_csv.csv",
input_resolution,
output_resolution,
).compute()
# %%
datasplit = datasplit_config.datasplit_type(datasplit_config)
# %%
viewer = datasplit._neuroglancer()
# %%
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ dependencies = [
"click",
"pyyaml",
"scipy",
"upath",
"boto3",
]

# extras
Expand Down

0 comments on commit 9ee5feb

Please sign in to comment.