-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added support for cloud based processing
- Loading branch information
1 parent
ad50bfb
commit aa93d5a
Showing
16 changed files
with
469 additions
and
54 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
from .core import compute_sv, combine, convert, process_raw_file | ||
from .core import initialize, compute_sv, combine, convert, process_raw_file | ||
|
||
__all__ = ["compute_sv", "combine", "convert", "process_raw_file"] | ||
__all__ = [ | ||
"compute_sv", "combine", "convert", "process_raw_file", "initialize" | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,3 @@ | ||
import traceback | ||
|
||
import typer | ||
import asyncio | ||
import os | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1 @@ | ||
typer | ||
rich | ||
typer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +0,0 @@ | ||
pydantic | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from .s3 import process_survey_data_with_progress, list_raw_files_from_bucket, convert_survey_data_from_bucket, \ | ||
download_file_from_bucket | ||
|
||
__all__ = [ | ||
"process_survey_data_with_progress", | ||
"list_raw_files_from_bucket", | ||
"convert_survey_data_from_bucket", | ||
"download_file_from_bucket" | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
import os | ||
import tempfile | ||
import botocore | ||
import boto3 | ||
from asyncio import CancelledError | ||
from rich import print | ||
from rich.traceback import install, Traceback | ||
from tqdm.auto import tqdm | ||
from dask import delayed | ||
from botocore.config import Config | ||
|
||
|
||
install(show_locals=False, width=120) | ||
|
||
|
||
def process_survey_data_with_progress(files_by_day, bucket_name, client, config_data, process_func): | ||
try: | ||
total_files = sum(len(files) for files in files_by_day.values()) | ||
progress_bar = tqdm(total=total_files, desc="Processing Files", unit="file", ncols=100) | ||
|
||
def update_progress(*args): | ||
progress_bar.update() | ||
|
||
temp_dir = tempfile.mkdtemp() | ||
futures, temp_dir = convert_survey_data_from_bucket(files_by_day, temp_dir, bucket_name, client, config_data, | ||
process_func) | ||
|
||
for future in futures: | ||
future.add_done_callback(update_progress) | ||
|
||
client.gather(futures) # Ensure all tasks complete | ||
|
||
progress_bar.close() | ||
os.rmdir(temp_dir) | ||
except KeyboardInterrupt: | ||
print("Closing down.") | ||
except CancelledError: | ||
print("Closing down.") | ||
except Exception as e: | ||
print(f"[bold red]An error occurred:[/bold red] {e}") | ||
print(f"{Traceback()}\n") | ||
|
||
|
||
def convert_survey_data_from_bucket(files_by_day, temp_dir, bucket_name, dask_client, config_data, process_func): | ||
"""Process survey data from S3.""" | ||
tasks = [] | ||
|
||
for day, files in files_by_day.items(): | ||
for file in files: | ||
task = delayed(_process_raw_file)(file, day, temp_dir, bucket_name, config_data, process_func) | ||
tasks.append(task) | ||
|
||
# Execute all tasks in parallel | ||
futures = dask_client.compute(tasks) | ||
|
||
return futures, temp_dir | ||
|
||
|
||
def list_raw_files_from_bucket(bucket_name, prefix): | ||
"""List files in the S3 bucket along with their metadata.""" | ||
s3_client = boto3.client('s3', config=Config(signature_version=botocore.UNSIGNED)) | ||
response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=prefix) | ||
|
||
files = [ | ||
{ | ||
'Key': item['Key'], | ||
'Size': item['Size'], | ||
'LastModified': item['LastModified'] | ||
} | ||
for item in response.get('Contents', []) if item['Key'].endswith('.raw') | ||
] | ||
|
||
return files | ||
|
||
|
||
def download_file_from_bucket(bucket_name, s3_key, local_dir): | ||
"""Download a file from S3.""" | ||
s3_client = boto3.client('s3', config=Config(signature_version=botocore.UNSIGNED)) | ||
local_path = os.path.join(local_dir, os.path.basename(s3_key)) | ||
s3_client.download_file(bucket_name, s3_key, local_path) | ||
|
||
return local_path | ||
|
||
|
||
def _process_raw_file(file, day, temp_dir, bucket_name, config_data, process_func): | ||
"""Process a single file.""" | ||
day_dir = os.path.join(temp_dir, day) | ||
os.makedirs(day_dir, exist_ok=True) | ||
local_path = download_file_from_bucket(bucket_name, file['Key'], day_dir) | ||
|
||
process_func(local_path, config=config_data, base_path=temp_dir) | ||
os.remove(local_path) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
from .blob_storage import list_zarr_files | ||
from .azure_processor import process_survey_data_with_progress, process_survey_data | ||
|
||
__all__ = [ | ||
"list_zarr_files", | ||
"process_survey_data", | ||
"process_survey_data_with_progress" | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
import re | ||
|
||
from asyncio import CancelledError | ||
from rich import print | ||
from rich.traceback import install, Traceback | ||
from tqdm.auto import tqdm | ||
from dask import delayed | ||
from .blob_storage import open_zarr_store | ||
|
||
install(show_locals=False, width=120) | ||
|
||
|
||
def process_survey_data_with_progress(files_by_day, azfs, container_name, client, config_data, process_func): | ||
try: | ||
total_files = sum(len(files) for files in files_by_day.values()) | ||
progress_bar = tqdm(total=total_files, desc="Processing Files", unit="file", ncols=100) | ||
|
||
def update_progress(*args): | ||
progress_bar.update() | ||
|
||
futures = process_survey_data(files_by_day, azfs, container_name, client, config_data, process_func) | ||
|
||
for future in futures: | ||
future.add_done_callback(update_progress) | ||
|
||
client.gather(futures) # Ensure all tasks complete | ||
|
||
progress_bar.close() | ||
except KeyboardInterrupt: | ||
print("Closing down.") | ||
except CancelledError: | ||
print("Closing down.") | ||
except Exception as e: | ||
print(f"[bold red]An error occurred:[/bold red] {e}") | ||
print(f"{Traceback()}\n") | ||
|
||
|
||
def process_survey_data(files_by_day, azfs, container_name, dask_client, config_data, process_func): | ||
"""Process survey data from S3.""" | ||
|
||
tasks = [] | ||
|
||
for day, files in files_by_day.items(): | ||
for file in files: | ||
task = delayed(_process_zarr_file)(file['Key'], azfs, container_name, config_data, process_func) | ||
tasks.append(task) | ||
|
||
# Execute all tasks in parallel | ||
futures = dask_client.compute(tasks) | ||
|
||
return futures | ||
|
||
|
||
def _process_zarr_file(file, azfs, container_name, config_data, process_func=None): | ||
"""Process a single Zarr file.""" | ||
base_path = file.replace('.zarr', '') | ||
pattern = rf"^{re.escape(container_name)}/" | ||
base_path = re.sub(pattern, '', base_path) | ||
|
||
echodata = open_zarr_store(azfs, file, chunks=config_data['chunks']) | ||
|
||
process_func(echodata, config_data, base_path=base_path, | ||
chunks=config_data.get('chunks'), | ||
plot_echogram=config_data.get('plot_echogram', False), | ||
waveform_mode=config_data.get('waveform_mode', "CW"), | ||
depth_offset=config_data.get('depth_offset', 0.0)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
import echopype as ep | ||
|
||
|
||
def list_zarr_files(azfs, path): | ||
"""List all Zarr files in the Azure Blob Storage container along with their metadata.""" | ||
zarr_files = [] | ||
for blob in azfs.ls(path, detail=True): | ||
if blob['type'] == 'directory' and not blob['name'].endswith('.zarr'): | ||
subdir_files = list_zarr_files(azfs, blob['name']) | ||
zarr_files.extend(subdir_files) | ||
elif blob['name'].endswith('.zarr'): | ||
zarr_files.append({ | ||
'Key': blob['name'], | ||
'Size': blob['size'] if blob['size'] else 0, | ||
'LastModified': blob['last_modified'] if 'last_modified' in blob else 0 | ||
}) | ||
|
||
return zarr_files | ||
|
||
|
||
def open_zarr_store(azfs, store_name, chunks=None): | ||
"""Open a Zarr store from Azure Blob Storage.""" | ||
mapper = azfs.get_mapper(store_name) | ||
|
||
return ep.open_converted(mapper, chunks=chunks) | ||
|
||
|
||
def _list_zarr_files_extended(azfs, path): | ||
"""List all Zarr files in the Azure Blob Storage container along with their metadata.""" | ||
zarr_files = [] | ||
for blob in azfs.ls(path, detail=True): | ||
if blob['type'] == 'directory' and not blob['name'].endswith('.zarr'): | ||
subdir_files = list_zarr_files(azfs, blob['name']) | ||
zarr_files.extend(subdir_files) | ||
else: | ||
# Calculate the total size and most recent modification date for the .zarr folder | ||
total_size = 0 | ||
last_modified = None | ||
for sub_blob in azfs.ls(blob['name'], detail=True): | ||
if sub_blob['type'] == 'file': | ||
total_size += sub_blob['size'] | ||
if last_modified is None or sub_blob['last_modified'] > last_modified: | ||
last_modified = sub_blob['last_modified'] | ||
|
||
zarr_files.append({ | ||
'Key': blob['name'], | ||
'Size': total_size, | ||
'LastModified': last_modified | ||
}) | ||
|
||
return zarr_files | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.