-
Notifications
You must be signed in to change notification settings - Fork 27
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
🐛 Fix file-picker downstream service notification issues #3058
Changes from 12 commits
8f005dc
4c307fa
d6145bc
8d629ad
925b868
4d69aca
e91cd8c
ce92d38
2199ffa
7452360
409498f
fe4a47e
6489e74
b233621
89bb6b2
58b00c7
1d43f53
6a72749
a257419
b9c86db
5e9c03d
ee30e3d
7e35f35
8fd5553
f7f88e9
fbeded1
10ca0e7
d05fe3c
f4fd642
e73c572
8668697
1b7721a
899d3f0
72ac601
b85becb
a0e1311
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,7 +7,7 @@ | |
import time | ||
from collections import deque | ||
from pathlib import Path | ||
from typing import Coroutine, Deque, Dict, List, Optional, Set, Tuple, cast | ||
from typing import Any, Coroutine, Deque, Dict, List, Optional, Set, Tuple, cast | ||
|
||
import magic | ||
from pydantic import ByteSize | ||
|
@@ -170,6 +170,76 @@ async def _get_data_from_port(port: Port) -> Tuple[Port, ItemConcreteValue]: | |
return (port, ret) | ||
|
||
|
||
async def _download_files( | ||
target_path: Path, download_tasks: Deque[Coroutine[Any, int, Any]] | ||
) -> Tuple[dict[str, Any], ByteSize]: | ||
transferred_bytes = 0 | ||
data: dict[str, Any] = {} | ||
|
||
if not download_tasks: | ||
return data, ByteSize(transferred_bytes) | ||
|
||
# TODO: limit concurrency to avoid saturating storage+db?? | ||
results: List[Tuple[Port, ItemConcreteValue]] = cast( | ||
List[Tuple[Port, ItemConcreteValue]], await logged_gather(*download_tasks) | ||
) | ||
logger.info("completed download %s", results) | ||
for port, value in results: | ||
|
||
data[port.key] = {"key": port.key, "value": value} | ||
|
||
if _FILE_TYPE_PREFIX in port.property_type: | ||
|
||
# if there are files, move them to the final destination | ||
downloaded_file: Optional[Path] = cast(Optional[Path], value) | ||
dest_path: Path = target_path / port.key | ||
|
||
if not downloaded_file or not downloaded_file.exists(): | ||
# the link may be empty | ||
# remove files all files from disk when disconnecting port | ||
logger.info("removing contents of dir %s", dest_path) | ||
await remove_directory( | ||
dest_path, only_children=True, ignore_errors=True | ||
) | ||
continue | ||
|
||
transferred_bytes = transferred_bytes + downloaded_file.stat().st_size | ||
|
||
# in case of valid file, it is either uncompressed and/or moved to the final directory | ||
logger.info("creating directory %s", dest_path) | ||
dest_path.mkdir(exist_ok=True, parents=True) | ||
data[port.key] = {"key": port.key, "value": str(dest_path)} | ||
|
||
dest_folder = PrunableFolder(dest_path) | ||
|
||
if _is_zip_file(downloaded_file): | ||
# unzip updated data to dest_path | ||
logger.info("unzipping %s", downloaded_file) | ||
unarchived: Set[Path] = await unarchive_dir( | ||
archive_to_extract=downloaded_file, destination_folder=dest_path | ||
) | ||
|
||
dest_folder.prune(exclude=unarchived) | ||
|
||
logger.info("all unzipped in %s", dest_path) | ||
else: | ||
logger.info("moving %s", downloaded_file) | ||
dest_path = dest_path / Path(downloaded_file).name | ||
await async_on_threadpool( | ||
# pylint: disable=cell-var-from-loop | ||
lambda: shutil.move(str(downloaded_file), dest_path) | ||
) | ||
|
||
dest_folder.prune(exclude={dest_path}) | ||
sanderegg marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
logger.info("all moved to %s", dest_path) | ||
else: | ||
transferred_bytes = transferred_bytes + sys.getsizeof(value) | ||
|
||
return data, ByteSize(transferred_bytes) | ||
|
||
|
||
@run_sequentially_in_context() | ||
async def download_target_ports( | ||
port_type_name: PortTypeName, target_path: Path, port_keys: List[str] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use list[str] There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will decide that if I remove |
||
) -> ByteSize: | ||
|
@@ -183,10 +253,9 @@ async def download_target_ports( | |
node_uuid=str(settings.DY_SIDECAR_NODE_ID), | ||
r_clone_settings=settings.DY_SIDECAR_R_CLONE_SETTINGS, | ||
) | ||
data = {} | ||
|
||
# let's gather all the data | ||
download_tasks = [] | ||
download_tasks: Deque[Coroutine[Any, int, Any]] = deque() | ||
for port_value in (await getattr(PORTS, port_type_name.value)).values(): | ||
# if port_keys contains some keys only download them | ||
logger.info("Checking node %s", port_value.key) | ||
|
@@ -196,61 +265,7 @@ async def download_target_ports( | |
download_tasks.append(_get_data_from_port(port_value)) | ||
logger.info("retrieving %s data", len(download_tasks)) | ||
|
||
transfer_bytes = 0 | ||
if download_tasks: | ||
# TODO: limit concurrency to avoid saturating storage+db?? | ||
results: List[Tuple[Port, ItemConcreteValue]] = cast( | ||
List[Tuple[Port, ItemConcreteValue]], await logged_gather(*download_tasks) | ||
) | ||
logger.info("completed download %s", results) | ||
for port, value in results: | ||
|
||
data[port.key] = {"key": port.key, "value": value} | ||
|
||
if _FILE_TYPE_PREFIX in port.property_type: | ||
|
||
# if there are files, move them to the final destination | ||
downloaded_file: Optional[Path] = cast(Optional[Path], value) | ||
dest_path: Path = target_path / port.key | ||
|
||
if not downloaded_file or not downloaded_file.exists(): | ||
# the link may be empty | ||
# remove files all files from disk when disconnecting port | ||
await remove_directory( | ||
dest_path, only_children=True, ignore_errors=True | ||
) | ||
continue | ||
|
||
transfer_bytes = transfer_bytes + downloaded_file.stat().st_size | ||
|
||
# in case of valid file, it is either uncompressed and/or moved to the final directory | ||
logger.info("creating directory %s", dest_path) | ||
dest_path.mkdir(exist_ok=True, parents=True) | ||
data[port.key] = {"key": port.key, "value": str(dest_path)} | ||
|
||
if _is_zip_file(downloaded_file): | ||
|
||
dest_folder = PrunableFolder(dest_path) | ||
|
||
# unzip updated data to dest_path | ||
logger.info("unzipping %s", downloaded_file) | ||
unarchived: Set[Path] = await unarchive_dir( | ||
archive_to_extract=downloaded_file, destination_folder=dest_path | ||
) | ||
|
||
dest_folder.prune(exclude=unarchived) | ||
|
||
logger.info("all unzipped in %s", dest_path) | ||
else: | ||
logger.info("moving %s", downloaded_file) | ||
dest_path = dest_path / Path(downloaded_file).name | ||
await async_on_threadpool( | ||
# pylint: disable=cell-var-from-loop | ||
lambda: shutil.move(str(downloaded_file), dest_path) | ||
) | ||
logger.info("all moved to %s", dest_path) | ||
else: | ||
transfer_bytes = transfer_bytes + sys.getsizeof(value) | ||
data, transferred_bytes = await _download_files(target_path, download_tasks) | ||
|
||
# create/update the json file with the new values | ||
if data: | ||
|
@@ -261,15 +276,13 @@ async def download_target_ports( | |
data = {**current_data, **data} | ||
data_file.write_text(json.dumps(data)) | ||
|
||
transferred = ByteSize(transfer_bytes) | ||
elapsed_time = time.perf_counter() - start_time | ||
logger.info( | ||
"Downloaded %s in %s seconds", | ||
transferred.human_readable(decimal=True), | ||
transferred_bytes.human_readable(decimal=True), | ||
elapsed_time, | ||
) | ||
|
||
return transferred | ||
return transferred_bytes | ||
|
||
|
||
__all__ = ["dispatch_update_for_directory", "download_target_ports"] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
deque
instead ofDeque
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OutputsDict
. Will be using that.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
deque
is usable for typing