-
Notifications
You must be signed in to change notification settings - Fork 555
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[build_manager] add support for remote zip #4263
Merged
jonathanmetzman
merged 1 commit into
google:master
from
paulsemel:add-support-for-remote-zip
Oct 8, 2024
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,11 +14,13 @@ | |
"""Build manager.""" | ||
|
||
from collections import namedtuple | ||
import contextlib | ||
import os | ||
import re | ||
import shutil | ||
import subprocess | ||
import time | ||
from typing import Optional | ||
|
||
from clusterfuzz._internal.base import errors | ||
from clusterfuzz._internal.base import utils | ||
|
@@ -402,23 +404,19 @@ def _post_setup_success(self, update_revision=True): | |
if instrumented_library_paths: | ||
self._patch_rpaths(instrumented_library_paths) | ||
|
||
def _unpack_build(self, base_build_dir, build_dir, build_url): | ||
"""Unpacks a build from a build url into the build directory.""" | ||
# Track time taken to unpack builds so that it doesn't silently regress. | ||
start_time = time.time() | ||
|
||
logs.info(f'Unpacking build from {build_url} into {build_dir}.') | ||
@contextlib.contextmanager | ||
def _download_and_open_build_archive(self, base_build_dir: str, | ||
build_dir: str, build_url: str): | ||
"""Downloads the build archive at `build_url` and opens it. | ||
|
||
# Free up memory. | ||
utils.python_gc() | ||
|
||
# Remove the current build. | ||
logs.info(f'Removing build directory {build_dir}.') | ||
if not shell.remove_directory(build_dir, recreate=True): | ||
logs.error(f'Unable to clear build directory {build_dir}.') | ||
_handle_unrecoverable_error_on_windows() | ||
return False | ||
Args: | ||
base_build_dir: the base build directory | ||
build_dir: the current build directory | ||
build_url: the build URL | ||
|
||
Yields: | ||
the build archive | ||
""" | ||
# Download build archive locally. | ||
build_local_archive = os.path.join(build_dir, os.path.basename(build_url)) | ||
|
||
|
@@ -431,15 +429,83 @@ def _unpack_build(self, base_build_dir, build_dir, build_url): | |
'Failed to make space for download. ' | ||
'Cleared all data directories to free up space, exiting.') | ||
|
||
logs.info(f'Downloading build from {build_url}.') | ||
logs.info(f'Downloading build from {build_url} to {build_local_archive}.') | ||
try: | ||
storage.copy_file_from(build_url, build_local_archive) | ||
except Exception as e: | ||
logs.error(f'Unable to download build from {build_url}: {e}') | ||
return False | ||
raise | ||
|
||
try: | ||
with build_archive.open(build_local_archive) as build: | ||
yield build | ||
finally: | ||
shell.remove_file(build_local_archive) | ||
|
||
def _open_build_archive(self, base_build_dir: str, build_dir: str, | ||
build_url: str, http_build_url: Optional[str], | ||
unpack_everything: Optional[bool]): | ||
"""Gets a handle on a build archive for the current build. Depending on the | ||
provided parameters, this function might download the build archive into | ||
the build directory or directly use remote HTTP archive. | ||
|
||
Args: | ||
unpack_everything: wether we should unpack the whole archive or try | ||
selective unpacking. | ||
base_build_dir: the base build directory. | ||
build_dir: the current build directory. | ||
build_url: the build URL. | ||
http_build_url: the HTTP build URL. | ||
|
||
Raises: | ||
if an error occurred while accessing the file over HTTP or while | ||
downloading the file on disk. | ||
|
||
Returns: | ||
the build archive. | ||
""" | ||
# We only want to use remote unzipping if we're not unpacking everything and | ||
# if the HTTP URL is compatible with remote unzipping. | ||
allow_unpack_over_http = environment.get_value( | ||
'ALLOW_UNPACK_OVER_HTTP', default_value=False) | ||
can_unzip_over_http = ( | ||
allow_unpack_over_http and not unpack_everything and http_build_url and | ||
build_archive.unzip_over_http_compatible(http_build_url)) | ||
|
||
if not can_unzip_over_http: | ||
return self._download_and_open_build_archive(base_build_dir, build_dir, | ||
build_url) | ||
logs.info("Opening an archive over HTTP, skipping archive download.") | ||
assert http_build_url | ||
return build_archive.open_uri(http_build_url) | ||
|
||
def _unpack_build(self, | ||
base_build_dir, | ||
build_dir, | ||
build_url, | ||
http_build_url=None): | ||
"""Unpacks a build from a build url into the build directory.""" | ||
# Track time taken to unpack builds so that it doesn't silently regress. | ||
start_time = time.time() | ||
|
||
unpack_everything = environment.get_value( | ||
'UNPACK_ALL_FUZZ_TARGETS_AND_FILES') | ||
|
||
logs.info(f'Unpacking build from {build_url} into {build_dir}.') | ||
|
||
# Free up memory. | ||
utils.python_gc() | ||
|
||
# Remove the current build. | ||
logs.info(f'Removing build directory {build_dir}.') | ||
if not shell.remove_directory(build_dir, recreate=True): | ||
logs.error(f'Unable to clear build directory {build_dir}.') | ||
_handle_unrecoverable_error_on_windows() | ||
return False | ||
|
||
try: | ||
with self._open_build_archive(base_build_dir, build_dir, build_url, | ||
http_build_url, unpack_everything) as build: | ||
unpack_everything = environment.get_value( | ||
paulsemel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
'UNPACK_ALL_FUZZ_TARGETS_AND_FILES') | ||
|
||
|
@@ -463,8 +529,7 @@ def _unpack_build(self, base_build_dir, build_dir, build_url): | |
'Cleared all data directories to free up space, exiting.') | ||
|
||
# Unpack the local build archive. | ||
logs.info( | ||
f'Unpacking build archive {build_local_archive} to {build_dir}.') | ||
logs.info(f'Unpacking build archive {build_url} to {build_dir}.') | ||
paulsemel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
trusted = not utils.is_oss_fuzz() | ||
|
||
build.unpack( | ||
|
@@ -473,7 +538,7 @@ def _unpack_build(self, base_build_dir, build_dir, build_url): | |
trusted=trusted) | ||
|
||
except Exception as e: | ||
logs.error(f'Unable to unpack build archive {build_local_archive}: {e}') | ||
logs.error(f'Unable to unpack build archive {build_url}: {e}') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this pass linting? I had the linter recently tell me to use lazy string interpolation for logging calls: logs.error('Unable to unpack build archive %s: %s', build_url, e) Maybe because this uses the |
||
return False | ||
|
||
if unpack_everything: | ||
|
@@ -484,9 +549,6 @@ def _unpack_build(self, base_build_dir, build_dir, build_url): | |
partial_build_file_path = os.path.join(build_dir, PARTIAL_BUILD_FILE) | ||
utils.write_data_to_file('', partial_build_file_path) | ||
|
||
# No point in keeping the archive around. | ||
shell.remove_file(build_local_archive) | ||
|
||
elapsed_time = time.time() - start_time | ||
elapsed_mins = elapsed_time / 60. | ||
log_func = logs.warning if elapsed_time > UNPACK_TIME_LIMIT else logs.info | ||
|
@@ -605,10 +667,20 @@ def __init__(self, | |
revision, | ||
build_url, | ||
build_prefix='', | ||
fuzz_target=None): | ||
fuzz_target=None, | ||
http_build_url=None): | ||
paulsemel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"""RegularBuild constructor. See Build constructor for other parameters. | ||
|
||
Args: | ||
http_build_url: the http build URL. E.g. | ||
http://storage.com/foo/bar.zip. Defaults to None. | ||
build_url: the GCS bucket URL where the build is stored. E.g. | ||
gs://foo/bar.zip. | ||
""" | ||
super().__init__( | ||
base_build_dir, revision, build_prefix, fuzz_target=fuzz_target) | ||
self.build_url = build_url | ||
self.http_build_url = http_build_url | ||
|
||
if build_prefix: | ||
self.build_dir_name = build_prefix.lower() | ||
|
@@ -630,7 +702,7 @@ def setup(self): | |
build_update = not self.exists() | ||
if build_update: | ||
if not self._unpack_build(self.base_build_dir, self.build_dir, | ||
self.build_url): | ||
self.build_url, self.http_build_url): | ||
return False | ||
|
||
logs.info('Retrieved build r%d.' % self.revision) | ||
|
@@ -1116,6 +1188,9 @@ def setup_regular_build(revision, | |
|
||
return None | ||
|
||
# build_url points to a GCP bucket, and we're only converting it to its HTTP | ||
# endpoint so that we can use remote unzipping. | ||
http_build_url = build_url.replace('gs://', 'https://storage.googleapis.com/') | ||
paulsemel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
base_build_dir = _base_build_dir(bucket_path) | ||
|
||
build_class = RegularBuild | ||
|
@@ -1133,7 +1208,8 @@ def setup_regular_build(revision, | |
revision, | ||
build_url, | ||
build_prefix=build_prefix, | ||
fuzz_target=fuzz_target) | ||
fuzz_target=fuzz_target, | ||
http_build_url=http_build_url) | ||
if build.setup(): | ||
result = build | ||
else: | ||
|
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note for future work: If searching these bytes take a long time, we might want to search for them "in parallel" by pushing the pattern list into
search_bytes_in_file
, which can scan each chunk of the file for all patterns (avoiding the need to re-read the file X times) or even reach for something like Aho-Corasick: https://pyahocorasick.readthedocs.io/en/latest/