Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: removed download_file #121

Merged
merged 1 commit into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 42 additions & 57 deletions src/data_hub/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,48 +239,6 @@ def list_rows(
)


@beartype
@ensure_client
def download_file(
sg-s marked this conversation as resolved.
Show resolved Hide resolved
file_id: str,
*,
destination: str | Path = os.getcwd(),
client=None,
_stash: bool = False,
) -> None:
"""Download a file to a destination folder (workspace).

Download a file synchronously from Deep Origin
to folder on the local file system.

Args:
file_id: ID of the file on Deep Origin
destination: Path to the destination folder

"""

if not os.path.isdir(destination):
raise DeepOriginException(
message=f"Destination `{destination}` should be a path for a folder."
)

file_name = _api.describe_file(
file_id=file_id,
client=client,
_stash=_stash,
).name

url = _api.create_file_download_url(
file_id=file_id,
client=client,
_stash=_stash,
).downloadUrl

save_path = os.path.join(destination, file_name)

download_sync(url, save_path)


@beartype
@ensure_client
def upload_file(
Expand Down Expand Up @@ -929,9 +887,9 @@ def download(
if PREFIXES.FILE in source:
# this is a file

download_file(
file_id=source,
destination=destination,
download_files(
file_ids=[source],
save_to_dir=destination,
client=client,
_stash=_stash,
)
Expand Down Expand Up @@ -1002,10 +960,11 @@ def download_database(

# now download all files in the database
if include_files:
file_ids = df.attrs["file_ids"]

for file_id in file_ids:
download_file(file_id, destination, client=client)
download_files(
file_ids=df.attrs["file_ids"],
save_to_dir=destination,
client=client,
)

df.to_csv(os.path.join(destination, database_hid + ".csv"))

Expand Down Expand Up @@ -1084,6 +1043,7 @@ def get_dataframe(
df = _make_deeporigin_dataframe(
data=data,
reference_ids=None,
file_ids=None,
db_row=db_row,
rows=None,
columns=None,
Expand Down Expand Up @@ -1169,6 +1129,7 @@ def get_dataframe(
df = _make_deeporigin_dataframe(
data=data,
reference_ids=reference_ids,
file_ids=file_ids,
db_row=db_row,
rows=rows,
columns=columns,
Expand All @@ -1192,6 +1153,7 @@ def _make_deeporigin_dataframe(
*,
data: dict,
reference_ids: Optional[list],
file_ids: Optional[list],
db_row: dict,
columns: Optional[list],
rows: Optional[list],
Expand All @@ -1203,6 +1165,12 @@ def _make_deeporigin_dataframe(
df = DataFrame(data)
if reference_ids is not None:
df.attrs["reference_ids"] = list(set(reference_ids))
df.attrs["reference_ids"].sort()

if file_ids is not None:
df.attrs["file_ids"] = list(set(file_ids))
df.attrs["file_ids"].sort()

df.attrs["id"] = db_row.id
df.attrs["metadata"] = dict(db_row)

Expand All @@ -1223,8 +1191,9 @@ def _make_deeporigin_dataframe(
@beartype
@ensure_client
def download_files(
files: Optional[list | dict] = None,
*,
files: Optional[list[dict]] = None,
file_ids: Optional[list[str]] = None,
save_to_dir: Path | str = Path("."),
use_file_names: bool = True,
client=None,
Expand All @@ -1233,21 +1202,37 @@ def download_files(
"""download multiple files in parallel to local disk

Args:
files: list of files to download. These can be of type `types.list_files_response.Data` (as returned by api.list_files) or can be a list of strings of file IDs.
files: list of files to download. These can be a list of file_ids or a list of files as returned by api.list_files
save_to_dir: directory to save files to on local computer
use_file_names: If `True`, refer to files by name rather than ID.
"""

if files is None:
files = list_files(client=client)
if not os.path.isdir(save_to_dir):
raise DeepOriginException(
message=f"Destination `{save_to_dir}` should be a path for a folder."
)

if files is None and file_ids is None:
# nothing provided, download everything
files = list_files(client=client, _stash=_stash)
elif files is not None and file_ids is None:
# list of files provided
pass
elif files is None and file_ids is not None:
# list of file IDs provided
files = list_files(
file_ids=file_ids,
client=client,
_stash=_stash,
)

if isinstance(files, dict):
files = [files]
else:
raise DeepOriginException("Only one of `files` or `file_ids` can be provided")

if isinstance(save_to_dir, str):
save_to_dir = Path(save_to_dir)

if isinstance(files[0], dict):
file_ids = [item.file.id for item in files]
file_ids = [item.file.id for item in files]

if use_file_names:
save_paths = [save_to_dir / item.file.name for item in files]
Expand Down
16 changes: 15 additions & 1 deletion tests/fixtures/responses/convert_id_format.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,30 @@
"id": "_row:cbOepriiytSaPfpqetT4C",
"hid": "dfsd-306"
},
{
"id": "_row:rakyaiwKrKsS4JT9IJsXn",
"hid": "exp-1"
},
{
"id": "_row:ku2ZdpZ0VkYgOj1Y37rSi",
"hid": "dna-8"
}
],
"{\"conversions\": [{\"id\": \"_row:cbOepriiytSaPfpqetT4C\"}, {\"id\": \"_row:ku2ZdpZ0VkYgOj1Y37rSi\"}, {\"id\": \"_row:rakyaiwKrKsS4JT9IJsXn\"}]}": [
{
"id": "_row:cbOepriiytSaPfpqetT4C",
"hid": "dfsd-306"
},
{
"id": "_row:rakyaiwKrKsS4JT9IJsXn",
"hid": "exp-1"
},
{
"id": "_row:ku2ZdpZ0VkYgOj1Y37rSi",
"hid": "dna-8"
}
],
"{\"conversions\": [{\"id\": \"_row:cbOepriiytSaPfpqetT4C\"}, {\"id\": \"_row:ku2ZdpZ0VkYgOj1Y37rSi\"}, {\"id\": \"_row:rakyaiwKrKsS4JT9IJsXn\"}]}": [
"{\"conversions\": [{\"id\": \"_row:cbOepriiytSaPfpqetT4C\"}, {\"id\": \"_row:rakyaiwKrKsS4JT9IJsXn\"}, {\"id\": \"_row:ku2ZdpZ0VkYgOj1Y37rSi\"}]}": [
{
"id": "_row:cbOepriiytSaPfpqetT4C",
"hid": "dfsd-306"
Expand Down
6 changes: 3 additions & 3 deletions tests/fixtures/responses/create_database.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"{\"database\": {\"hid\": \"tc-4Qzkrn57rM-db\", \"hidPrefix\": \"tc-4Qzkrn57rM-db\", \"name\": \"tc-4Qzkrn57rM-db\", \"parentId\": \"registry\"}}": {
"id": "_database:zN9taqV6WxLzjWxKtxwce",
"id": "_database:LanvJgEqhfNVHtgeZgsnT",
"type": "database",
"hid": "tc-4Qzkrn57rM-db",
"name": "tc-4Qzkrn57rM-db",
"dateCreated": "2024-11-19 03:19:28.539577",
"dateUpdated": "2024-11-19 03:19:28.539577",
"dateCreated": "2024-11-25 19:08:42.989402",
"dateUpdated": "2024-11-25 19:08:42.989402",
"createdByUserDrn": "<redacted>",
"parentId": "_workspace:IYTHdIjtuSvnOekwQjUlN",
"hidPrefix": "tc-4Qzkrn57rM-db",
Expand Down
15 changes: 15 additions & 0 deletions tests/fixtures/responses/create_file_upload.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,20 @@
"dateUpdated": "2024-11-19T03:19:28.397Z",
"createdByUserDrn": "<redacted>"
}
},
"{\"checksum_sha256\": \"rRS/5tR0d/MjICI1AdOr7CAyl1JIvgc79n3isXNAf9g=\", \"content_length\": \"10682\", \"content_type\": \"text/x-python\", \"name\": \"test_data_hub.py\"}": {
"uploadUrl": "<redacted>",
"file": {
"name": "test_data_hub.py",
"contentType": "text/x-python",
"contentLength": 10682,
"checksumSha256": "rRS/5tR0d/MjICI1AdOr7CAyl1JIvgc79n3isXNAf9g=",
"id": "_file:ZKPAZZ8H8eKtrXRwZ9CON",
"status": "ready",
"uri": "<redacted>",
"dateCreated": "2024-11-25T19:08:43.221Z",
"dateUpdated": "2024-11-25T19:08:43.221Z",
"createdByUserDrn": "<redacted>"
}
}
}
6 changes: 3 additions & 3 deletions tests/fixtures/responses/create_workspace.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"{\"workspace\": {\"hid\": \"tc-4Qzkrn57rM-ws\", \"name\": \"tc-4Qzkrn57rM-ws\", \"parentId\": null}}": {
"id": "_workspace:WGKwIfOV6ySsJMkbtvRvC",
"id": "_workspace:MptBTTL9qUbbSbUiAydsN",
"type": "workspace",
"hid": "tc-4Qzkrn57rM-ws",
"name": "tc-4Qzkrn57rM-ws",
"dateCreated": "2024-11-19 03:19:28.75955",
"dateUpdated": "2024-11-19 03:19:28.75955",
"dateCreated": "2024-11-25 19:08:43.22463",
"dateUpdated": "2024-11-25 19:08:43.22463",
"createdByUserDrn": "<redacted>"
}
}
2 changes: 1 addition & 1 deletion tests/fixtures/responses/describe_database_stats.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"{\"database_id\": \"kitchen-sink\"}": {
"rowCount": 127
"rowCount": 0
sg-s marked this conversation as resolved.
Show resolved Hide resolved
}
}
2 changes: 2 additions & 0 deletions tests/fixtures/responses/describe_row.json
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@
]
},
"{\"fields\": true, \"row_id\": \"dfsd-306\"}": {
"parentHid": "ligands",
"type": "row",
"id": "_row:cbOepriiytSaPfpqetT4C",
"hid": "dfsd-306",
Expand Down Expand Up @@ -729,6 +730,7 @@
]
},
"{\"fields\": false, \"row_id\": \"dfsd-306\"}": {
"parentHid": "ligands",
"type": "row",
"id": "_row:cbOepriiytSaPfpqetT4C",
"hid": "dfsd-306",
Expand Down
9 changes: 5 additions & 4 deletions tests/fixtures/responses/ensure_rows.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
"{\"database_id\": \"kitchen-sink\", \"rows\": [{\"row\": {}}]}": {
"rows": [
{
"parentHid": "kitchen-sink",
"type": "row",
"id": "_row:OIy39TGcV7jqLbCHsdhTO",
"hid": "ks-136",
"id": "_row:_database:swAKhqQzxqTB7cm2FeVcW:iqZurZ4W4UzDwpRgo5Vuj",
"hid": "ks-155",
"parentId": "_database:swAKhqQzxqTB7cm2FeVcW",
"dateCreated": "2024-11-19 03:19:29.007535",
"dateUpdated": "2024-11-19 03:19:29.007535",
"dateCreated": "2024-11-25 19:08:43.711226",
"dateUpdated": "2024-11-25 19:08:43.711226",
"createdByUserDrn": "<redacted>",
"validationStatus": "valid"
}
Expand Down
Loading