Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Temp file fixes #4256

Merged
merged 13 commits into from
May 19, 2023
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ No changes to highlight.

## Bug Fixes:

- Fixed Gallery/AnnotatedImage components not respecting GRADIO_DEFAULT_DIR variable by [@freddyaboulton](https://github.com/freddyaboulton) in [PR 4256](https://github.com/gradio-app/gradio/pull/4256)
- Fixed Gallery/AnnotatedImage components resaving identical images by [@freddyaboulton](https://github.com/freddyaboulton) in [PR 4256](https://github.com/gradio-app/gradio/pull/4256)
- Fixed Audio/Video/File components creating empty tempfiles on each run by [@freddyaboulton](https://github.com/freddyaboulton) in [PR 4256](https://github.com/gradio-app/gradio/pull/4256)
- Fixed the behavior of the `run_on_click` parameter in `gr.Examples` by [@abidlabs](https://github.com/abidlabs) in [PR 4258](https://github.com/gradio-app/gradio/pull/4258).
- Ensure js client respcts the full root when making requests to the server by [@pngwn](https://github.com/pngwn) in [PR 4271](https://github.com/gradio-app/gradio/pull/4271)

Expand Down
4 changes: 2 additions & 2 deletions client/python/gradio_client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -785,8 +785,8 @@ def serialize(self, *data) -> tuple:
if t in ["file", "uploadbutton"]
]
uploaded_files = self._upload(files)
self._add_uploaded_files_to_data(uploaded_files, list(data))

data = list(data)
abidlabs marked this conversation as resolved.
Show resolved Hide resolved
self._add_uploaded_files_to_data(uploaded_files, data)
o = tuple([s.serialize(d) for s, d in zip(self.serializers, data)])
return o

Expand Down
14 changes: 10 additions & 4 deletions client/python/test/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,13 +252,19 @@ def test_upload_file_private_space(self):
with patch.object(
client.endpoints[0], "_upload", wraps=client.endpoints[0]._upload
) as upload:
with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
f.write("Hello from private space!")

output = client.submit(1, "foo", f.name, api_name="/file_upload").result()
with patch.object(
client.endpoints[0], "serialize", wraps=client.endpoints[0].serialize
) as serialize:
with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
f.write("Hello from private space!")

output = client.submit(
1, "foo", f.name, api_name="/file_upload"
).result()
with open(output) as f:
assert f.read() == "Hello from private space!"
upload.assert_called_once()
assert all(f["is_file"] for f in serialize.return_value())

with patch.object(
client.endpoints[1], "_upload", wraps=client.endpoints[0]._upload
Expand Down
162 changes: 106 additions & 56 deletions gradio/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from enum import Enum
from pathlib import Path
from types import ModuleType
from typing import TYPE_CHECKING, Any, Callable, Dict, cast
from typing import TYPE_CHECKING, Any, Callable, Dict

import aiofiles
import altair as alt
Expand Down Expand Up @@ -217,14 +217,16 @@ def __init__(
if callable(load_fn):
self.attach_load_event(load_fn, every)

def hash_file(self, file_path: str, chunk_num_blocks: int = 128) -> str:
@staticmethod
def hash_file(file_path: str, chunk_num_blocks: int = 128) -> str:
sha1 = hashlib.sha1()
with open(file_path, "rb") as f:
for chunk in iter(lambda: f.read(chunk_num_blocks * sha1.block_size), b""):
sha1.update(chunk)
return sha1.hexdigest()

def hash_url(self, url: str, chunk_num_blocks: int = 128) -> str:
@staticmethod
def hash_url(url: str, chunk_num_blocks: int = 128) -> str:
sha1 = hashlib.sha1()
remote = urllib.request.urlopen(url)
max_file_size = 100 * 1024 * 1024 # 100MB
Expand All @@ -237,7 +239,14 @@ def hash_url(self, url: str, chunk_num_blocks: int = 128) -> str:
sha1.update(data)
return sha1.hexdigest()

def hash_base64(self, base64_encoding: str, chunk_num_blocks: int = 128) -> str:
@staticmethod
def hash_bytes(bytes: bytes):
sha1 = hashlib.sha1()
sha1.update(bytes)
return sha1.hexdigest()

@staticmethod
def hash_base64(base64_encoding: str, chunk_num_blocks: int = 128) -> str:
sha1 = hashlib.sha1()
for i in range(0, len(base64_encoding), chunk_num_blocks * sha1.block_size):
data = base64_encoding[i : i + chunk_num_blocks * sha1.block_size]
Expand All @@ -251,9 +260,8 @@ def make_temp_copy_if_needed(self, file_path: str) -> str:
temp_dir = Path(self.DEFAULT_TEMP_DIR) / temp_dir
temp_dir.mkdir(exist_ok=True, parents=True)

f = tempfile.NamedTemporaryFile(delete=False, dir=temp_dir)
f.name = client_utils.strip_invalid_filename_characters(Path(file_path).name)
full_temp_file_path = str(utils.abspath(temp_dir / f.name))
name = client_utils.strip_invalid_filename_characters(Path(file_path).name)
full_temp_file_path = str(utils.abspath(temp_dir / name))

if not Path(full_temp_file_path).exists():
shutil.copy2(file_path, full_temp_file_path)
Expand All @@ -267,15 +275,14 @@ async def save_uploaded_file(self, file: UploadFile, upload_dir: str) -> str:
) # Since the full file is being uploaded anyways, there is no benefit to hashing the file.
temp_dir = Path(upload_dir) / temp_dir
temp_dir.mkdir(exist_ok=True, parents=True)
output_file_obj = tempfile.NamedTemporaryFile(delete=False, dir=temp_dir)

if file.filename:
file_name = Path(file.filename).name
output_file_obj.name = client_utils.strip_invalid_filename_characters(
file_name
)
name = client_utils.strip_invalid_filename_characters(file_name)
else:
name = f"tmp{secrets.token_hex(5)}"

full_temp_file_path = str(utils.abspath(temp_dir / output_file_obj.name))
full_temp_file_path = str(utils.abspath(temp_dir / name))

async with aiofiles.open(full_temp_file_path, "wb") as output_file:
while True:
Expand All @@ -292,10 +299,9 @@ def download_temp_copy_if_needed(self, url: str) -> str:
temp_dir = self.hash_url(url)
temp_dir = Path(self.DEFAULT_TEMP_DIR) / temp_dir
temp_dir.mkdir(exist_ok=True, parents=True)
f = tempfile.NamedTemporaryFile(delete=False, dir=temp_dir)

f.name = client_utils.strip_invalid_filename_characters(Path(url).name)
full_temp_file_path = str(utils.abspath(temp_dir / f.name))
name = client_utils.strip_invalid_filename_characters(Path(url).name)
full_temp_file_path = str(utils.abspath(temp_dir / name))

if not Path(full_temp_file_path).exists():
with requests.get(url, stream=True) as r, open(
Expand Down Expand Up @@ -323,8 +329,7 @@ def base64_to_temp_file_if_needed(
file_name = f"file.{guess_extension}"
else:
file_name = "file"
f = tempfile.NamedTemporaryFile(delete=False, dir=temp_dir)
f.name = file_name # type: ignore

full_temp_file_path = str(utils.abspath(temp_dir / file_name)) # type: ignore

if not Path(full_temp_file_path).exists():
Expand All @@ -335,6 +340,36 @@ def base64_to_temp_file_if_needed(
self.temp_files.add(full_temp_file_path)
return full_temp_file_path

def pil_to_temp_file(self, img: _Image.Image, dir: str, format="png") -> str:
bytes_data = processing_utils.encode_pil_to_bytes(img, format)
temp_dir = Path(dir) / self.hash_bytes(bytes_data)
temp_dir.mkdir(exist_ok=True, parents=True)
filename = str(temp_dir / f"image.{format}")
img.save(filename, pnginfo=processing_utils.get_pil_metadata(img))
return filename

def img_array_to_temp_file(self, arr: np.ndarray, dir: str) -> str:
pil_image = _Image.fromarray(
processing_utils._convert(arr, np.uint8, force_copy=False)
)
return self.pil_to_temp_file(pil_image, dir, format="png")

def audio_to_temp_file(
self, data: np.ndarray, sample_rate: int, dir: str, format: str
):
temp_dir = Path(dir) / self.hash_bytes(data.tobytes())
temp_dir.mkdir(exist_ok=True, parents=True)
filename = str(temp_dir / f"audio.{format}")
processing_utils.audio_to_file(sample_rate, data, filename, format=format)
return filename

def file_bytes_to_file(self, data: bytes, dir: str, file_name: str):
path = Path(dir) / self.hash_bytes(data)
path.mkdir(exist_ok=True, parents=True)
path = path / Path(file_name).name
path.write_bytes(data)
return path

def get_config(self):
config = {
"label": self.label,
Expand Down Expand Up @@ -1758,12 +1793,11 @@ def _format_image(
elif self.type == "numpy":
return np.array(im)
elif self.type == "filepath":
file_obj = tempfile.NamedTemporaryFile(
delete=False,
suffix=(f".{fmt.lower()}" if fmt is not None else ".png"),
path = self.pil_to_temp_file(
im, dir=self.DEFAULT_TEMP_DIR, format=fmt or "png"
)
im.save(file_obj.name)
return self.make_temp_copy_if_needed(file_obj.name)
self.temp_files.add(path)
return path
else:
raise ValueError(
"Unknown type: "
Expand Down Expand Up @@ -2259,8 +2293,7 @@ def srt_to_vtt(srt_file_path, vtt_file_path):
# HTML5 only support vtt format
if Path(subtitle).suffix == ".srt":
temp_file = tempfile.NamedTemporaryFile(
delete=False,
suffix=".vtt",
delete=False, suffix=".vtt", dir=self.DEFAULT_TEMP_DIR
)

srt_to_vtt(subtitle, temp_file.name)
Expand Down Expand Up @@ -2483,7 +2516,9 @@ def tokenize(self, x):
# Handle the leave one outs
leave_one_out_data = np.copy(data)
leave_one_out_data[start:stop] = 0
file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
file = tempfile.NamedTemporaryFile(
delete=False, suffix=".wav", dir=self.DEFAULT_TEMP_DIR
)
processing_utils.audio_to_file(sample_rate, leave_one_out_data, file.name)
out_data = client_utils.encode_file_to_base64(file.name)
leave_one_out_sets.append(out_data)
Expand All @@ -2494,7 +2529,9 @@ def tokenize(self, x):
token = np.copy(data)
token[0:start] = 0
token[stop:] = 0
file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
file = tempfile.NamedTemporaryFile(
delete=False, suffix=".wav", dir=self.DEFAULT_TEMP_DIR
)
processing_utils.audio_to_file(sample_rate, token, file.name)
token_data = client_utils.encode_file_to_base64(file.name)
file.close()
Expand Down Expand Up @@ -2525,7 +2562,7 @@ def get_masked_inputs(self, tokens, binary_mask_matrix):
masked_input = np.copy(zero_input)
for t, b in zip(token_data, binary_mask_vector):
masked_input = masked_input + t * int(b)
file = tempfile.NamedTemporaryFile(delete=False)
file = tempfile.NamedTemporaryFile(delete=False, dir=self.DEFAULT_TEMP_DIR)
processing_utils.audio_to_file(sample_rate, masked_input, file.name)
masked_data = client_utils.encode_file_to_base64(file.name)
file.close()
Expand All @@ -2546,11 +2583,9 @@ def postprocess(self, y: tuple[int, np.ndarray] | str | None) -> str | dict | No
return {"name": y, "data": None, "is_file": True}
if isinstance(y, tuple):
sample_rate, data = y
file = tempfile.NamedTemporaryFile(suffix=f".{self.format}", delete=False)
processing_utils.audio_to_file(
sample_rate, data, file.name, format=self.format
file_path = self.audio_to_temp_file(
data, sample_rate, dir=self.DEFAULT_TEMP_DIR, format=self.format
)
file_path = str(utils.abspath(file.name))
self.temp_files.add(file_path)
else:
file_path = self.make_temp_copy_if_needed(y)
Expand Down Expand Up @@ -2720,14 +2755,21 @@ def process_single_file(f) -> bytes | tempfile._TemporaryFileWrapper:
)
if self.type == "file":
if is_file:
temp_file_path = self.make_temp_copy_if_needed(file_name)
file = tempfile.NamedTemporaryFile(delete=False)
file.name = temp_file_path
file.orig_name = file_name # type: ignore
path = self.make_temp_copy_if_needed(file_name)
else:
file = client_utils.decode_base64_to_file(data, file_path=file_name)
file.orig_name = file_name # type: ignore
self.temp_files.add(str(utils.abspath(file.name)))
data, _ = client_utils.decode_base64_to_binary(data)
path = self.file_bytes_to_file(
data, dir=self.DEFAULT_TEMP_DIR, file_name=file_name
)
path = str(utils.abspath(path))
self.temp_files.add(path)

# Creation of tempfiles here
file = tempfile.NamedTemporaryFile(
abidlabs marked this conversation as resolved.
Show resolved Hide resolved
delete=False, dir=self.DEFAULT_TEMP_DIR
)
file.name = path
file.orig_name = file_name # type: ignore
return file
elif (
self.type == "binary" or self.type == "bytes"
Expand Down Expand Up @@ -2777,13 +2819,14 @@ def postprocess(
for file in y
]
else:
return {
d = {
"orig_name": Path(y).name,
"name": self.make_temp_copy_if_needed(y),
"size": Path(y).stat().st_size,
"data": None,
"is_file": True,
}
return d

def style(
self,
Expand Down Expand Up @@ -3472,14 +3515,19 @@ def process_single_file(f) -> bytes | tempfile._TemporaryFileWrapper:
)
if self.type == "file":
if is_file:
temp_file_path = self.make_temp_copy_if_needed(file_name)
file = tempfile.NamedTemporaryFile(delete=False)
file.name = temp_file_path
file.orig_name = file_name # type: ignore
path = self.make_temp_copy_if_needed(file_name)
else:
file = client_utils.decode_base64_to_file(data, file_path=file_name)
file.orig_name = file_name # type: ignore
self.temp_files.add(str(utils.abspath(file.name)))
data, _ = client_utils.decode_base64_to_binary(data)
path = self.file_bytes_to_file(
data, dir=self.DEFAULT_TEMP_DIR, file_name=file_name
)
path = str(utils.abspath(path))
self.temp_files.add(path)
file = tempfile.NamedTemporaryFile(
delete=False, dir=self.DEFAULT_TEMP_DIR
)
file.name = path
file.orig_name = file_name # type: ignore
return file
elif self.type == "bytes":
if is_file:
Expand Down Expand Up @@ -4068,11 +4116,11 @@ def postprocess(
base_img_path = base_img
base_img = np.array(_Image.open(base_img))
elif isinstance(base_img, np.ndarray):
base_file = processing_utils.save_array_to_file(base_img)
freddyaboulton marked this conversation as resolved.
Show resolved Hide resolved
base_img_path = str(utils.abspath(base_file.name))
base_file = self.img_array_to_temp_file(base_img, dir=self.DEFAULT_TEMP_DIR)
base_img_path = str(utils.abspath(base_file))
elif isinstance(base_img, _Image.Image):
base_file = processing_utils.save_pil_to_file(base_img)
freddyaboulton marked this conversation as resolved.
Show resolved Hide resolved
base_img_path = str(utils.abspath(base_file.name))
base_file = self.pil_to_temp_file(base_img, dir=self.DEFAULT_TEMP_DIR)
base_img_path = str(utils.abspath(base_file))
base_img = np.array(base_img)
else:
raise ValueError(
Expand Down Expand Up @@ -4116,8 +4164,10 @@ def hex_to_rgb(value):

colored_mask_img = _Image.fromarray((colored_mask).astype(np.uint8))

mask_file = processing_utils.save_pil_to_file(colored_mask_img)
mask_file_path = str(utils.abspath(mask_file.name))
mask_file = self.pil_to_temp_file(
colored_mask_img, dir=self.DEFAULT_TEMP_DIR
)
mask_file_path = str(utils.abspath(mask_file))
self.temp_files.add(mask_file_path)

sections.append(
Expand Down Expand Up @@ -4404,12 +4454,12 @@ def postprocess(
if isinstance(img, (tuple, list)):
img, caption = img
if isinstance(img, np.ndarray):
file = processing_utils.save_array_to_file(img)
file_path = str(utils.abspath(file.name))
file = self.img_array_to_temp_file(img, dir=self.DEFAULT_TEMP_DIR)
file_path = str(utils.abspath(file))
self.temp_files.add(file_path)
elif isinstance(img, _Image.Image):
file = processing_utils.save_pil_to_file(img)
file_path = str(utils.abspath(file.name))
file = self.pil_to_temp_file(img, dir=self.DEFAULT_TEMP_DIR)
file_path = str(utils.abspath(file))
self.temp_files.add(file_path)
elif isinstance(img, str):
if utils.validate_url(img):
Expand Down
Loading