diff --git a/CHANGELOG.md b/CHANGELOG.md index c84e67e8e..6670f8ef4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +* Add ability to upload from an unbound source such as standard input or a named pipe + +### Deprecated +* Support of `-` as a valid filename in `upload-file` command. In future `-` will be an alias for standard input. * Declare official support of Python 3.12 * Cache-Control option when uploading files diff --git a/b2/_cli/argcompleters.py b/b2/_cli/argcompleters.py index 5d6028e96..a4510a35e 100644 --- a/b2/_cli/argcompleters.py +++ b/b2/_cli/argcompleters.py @@ -10,7 +10,7 @@ from functools import wraps from itertools import islice -from b2sdk.api import B2Api +from b2sdk.v2.api import B2Api from b2._cli.b2api import _get_b2api_for_profile from b2._cli.const import LIST_FILE_NAMES_MAX_LIMIT diff --git a/b2/_cli/b2api.py b/b2/_cli/b2api.py index 5c0e43d17..5cda2b6d3 100644 --- a/b2/_cli/b2api.py +++ b/b2/_cli/b2api.py @@ -11,10 +11,12 @@ import os from typing import Optional -from b2sdk.account_info.sqlite_account_info import SqliteAccountInfo -from b2sdk.api_config import B2HttpApiConfig -from b2sdk.cache import AuthInfoCache -from b2sdk.v2 import B2Api +from b2sdk.v2 import ( + AuthInfoCache, + B2Api, + B2HttpApiConfig, + SqliteAccountInfo, +) from b2._cli.const import B2_USER_AGENT_APPEND_ENV_VAR diff --git a/b2/_cli/const.py b/b2/_cli/const.py index b88d24653..5ac8acc20 100644 --- a/b2/_cli/const.py +++ b/b2/_cli/const.py @@ -20,5 +20,7 @@ B2_SOURCE_SSE_C_KEY_B64_ENV_VAR = 'B2_SOURCE_SSE_C_KEY_B64' # Constants used in the B2 API +# TODO B2-47 move API related constants to b2sdk CREATE_BUCKET_TYPES = ('allPublic', 'allPrivate') +DEFAULT_MIN_PART_SIZE = 5 * 1000 * 1000 # 5MB LIST_FILE_NAMES_MAX_LIMIT = 10000 # https://www.backblaze.com/b2/docs/b2_list_file_names.html diff --git a/b2/_utils/__init__.py b/b2/_utils/__init__.py new file mode 100644 index 000000000..1bc414dca --- /dev/null +++ b/b2/_utils/__init__.py @@ -0,0 +1,9 @@ +###################################################################### +# +# File: b2/_utils/__init__.py +# +# Copyright 2023 Backblaze Inc. All Rights Reserved. +# +# License https://www.backblaze.com/using_b2_code.html +# +###################################################################### diff --git a/b2/_utils/filesystem.py b/b2/_utils/filesystem.py new file mode 100644 index 000000000..8e529bda6 --- /dev/null +++ b/b2/_utils/filesystem.py @@ -0,0 +1,20 @@ +###################################################################### +# +# File: b2/_utils/filesystem.py +# +# Copyright 2023 Backblaze Inc. All Rights Reserved. +# +# License https://www.backblaze.com/using_b2_code.html +# +###################################################################### +import stat +from pathlib import Path + + +def points_to_fifo(path: Path) -> bool: + path = path.resolve() + try: + + return stat.S_ISFIFO(path.stat().st_mode) + except OSError: + return False diff --git a/b2/console_tool.py b/b2/console_tool.py index b08a2f0f1..a230eb216 100644 --- a/b2/console_tool.py +++ b/b2/console_tool.py @@ -33,11 +33,11 @@ import threading import time import unicodedata -from abc import ABCMeta, abstractclassmethod +from abc import ABCMeta, abstractmethod from concurrent.futures import Executor, Future, ThreadPoolExecutor from contextlib import suppress from enum import Enum -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, BinaryIO, Dict, List, Optional, Tuple import argcomplete import b2sdk @@ -115,8 +115,10 @@ B2_SOURCE_SSE_C_KEY_B64_ENV_VAR, B2_USER_AGENT_APPEND_ENV_VAR, CREATE_BUCKET_TYPES, + DEFAULT_MIN_PART_SIZE, ) from b2._cli.shell import detect_shell +from b2._utils.filesystem import points_to_fifo from b2.arg_parser import ( ArgumentParser, parse_comma_separated_list, @@ -506,6 +508,7 @@ class UploadModeMixin(Described): @classmethod def _setup_parser(cls, parser): parser.add_argument('--incrementalMode', action='store_true') + super()._setup_parser(parser) # noqa @staticmethod def _get_upload_mode_from_args(args): @@ -514,6 +517,21 @@ def _get_upload_mode_from_args(args): return UploadMode.FULL +class ProgressMixin(Described): + """ + If the ``tqdm`` library is installed, progress bar is displayed + on stderr. Without it, simple text progress is printed. + Use ``--noProgress`` to disable progress reporting (marginally improves performance in some cases). + """ + + @classmethod + def _setup_parser(cls, parser): + parser.add_argument( + '--noProgress', action='store_true', help="progress will not be reported" + ) + super()._setup_parser(parser) # noqa + + class Command(Described): # Set to True for commands that receive sensitive information in arguments FORBID_LOGGING_ARGUMENTS = False @@ -934,7 +952,7 @@ def _setup_parser(cls, parser): info_group = parser.add_mutually_exclusive_group() - info_group.add_argument('--info', action='append', default=[]) + info_group.add_argument('--info', action='append') info_group.add_argument('--noInfo', action='store_true', default=False) parser.add_argument('sourceFileId') @@ -1279,16 +1297,13 @@ def _print_file_attribute(self, label, value): @B2.register_subcommand class DownloadFileById( - SourceSseMixin, WriteBufferSizeMixin, SkipHashVerificationMixin, MaxDownloadStreamsMixin, - DownloadCommand + ProgressMixin, SourceSseMixin, WriteBufferSizeMixin, SkipHashVerificationMixin, + MaxDownloadStreamsMixin, DownloadCommand ): """ Downloads the given file, and stores it in the given local file. - If the ``tqdm`` library is installed, progress bar is displayed - on stderr. Without it, simple text progress is printed. - Use ``--noProgress`` to disable progress reporting. - + {PROGRESSMIXIN} {SOURCESSEMIXIN} {WRITEBUFFERSIZEMIXIN} {SKIPHASHVERIFICATIONMIXIN} @@ -1301,7 +1316,6 @@ class DownloadFileById( @classmethod def _setup_parser(cls, parser): - parser.add_argument('--noProgress', action='store_true') parser.add_argument('--threads', type=int, default=10) parser.add_argument('fileId') parser.add_argument('localFileName') @@ -1311,7 +1325,7 @@ def run(self, args): progress_listener = make_progress_listener(args.localFileName, args.noProgress) encryption_setting = self._get_source_sse_setting(args) if args.threads: - # FIXME: This is using deprecated API. It should be be replaced when moving to b2sdk apiver 3. + # FIXME: This is using deprecated API. It should be replaced when moving to b2sdk apiver 3. # There is `max_download_workers` param in B2Api constructor for this. self.api.services.download_manager.set_thread_pool_size(args.threads) downloaded_file = self.api.download_file_by_id( @@ -1325,6 +1339,7 @@ def run(self, args): @B2.register_subcommand class DownloadFileByName( + ProgressMixin, SourceSseMixin, WriteBufferSizeMixin, SkipHashVerificationMixin, @@ -1334,10 +1349,7 @@ class DownloadFileByName( """ Downloads the given file, and stores it in the given local file. - If the ``tqdm`` library is installed, progress bar is displayed - on stderr. Without it, simple text progress is printed. - Use ``--noProgress`` to disable progress reporting. - + {PROGRESSMIXIN} {SOURCESSEMIXIN} {WRITEBUFFERSIZEMIXIN} {SKIPHASHVERIFICATIONMIXIN} @@ -1350,7 +1362,6 @@ class DownloadFileByName( @classmethod def _setup_parser(cls, parser): - parser.add_argument('--noProgress', action='store_true') parser.add_argument('--threads', type=int, default=10) parser.add_argument('bucketName').completer = bucket_name_completer parser.add_argument('b2FileName').completer = file_name_completer @@ -1359,7 +1370,7 @@ def _setup_parser(cls, parser): def run(self, args): if args.threads: - # FIXME: This is using deprecated API. It should be be replaced when moving to b2sdk apiver 3. + # FIXME: This is using deprecated API. It should be replaced when moving to b2sdk apiver 3. # There is `max_download_workers` param in B2Api constructor for this. self.api.services.download_manager.set_thread_pool_size(args.threads) bucket = self.api.get_bucket_by_name(args.bucketName) @@ -2379,7 +2390,7 @@ def run(self, args): upload_threads = args.uploadThreads download_threads = args.downloadThreads - # FIXME: This is using deprecated API. It should be be replaced when moving to b2sdk apiver 3. + # FIXME: This is using deprecated API. It should be replaced when moving to b2sdk apiver 3. # There are `max_X_workers` params in B2Api constructor for this. self.api.services.upload_manager.set_thread_pool_size(upload_threads) self.api.services.download_manager.set_thread_pool_size(download_threads) @@ -2608,43 +2619,252 @@ def run(self, args): return 0 -@B2.register_subcommand -class UploadFile( - DestinationSseMixin, LegalHoldMixin, FileRetentionSettingMixin, UploadModeMixin, Command +class MinPartSizeMixin(Described): + """ + By default, the file is broken into many parts to maximize upload parallelism and increase speed. + Setting ``--minPartSize`` controls the minimal upload file part size. + Part size must be in 5MB to 5GB range. + Reference: ``_ + """ + + @classmethod + def _setup_parser(cls, parser): + parser.add_argument( + '--minPartSize', + type=int, + help="minimum part size in bytes", + default=None, + ) + super()._setup_parser(parser) # noqa + + +class UploadFileMixin( + MinPartSizeMixin, + ProgressMixin, + DestinationSseMixin, + LegalHoldMixin, + FileRetentionSettingMixin, + metaclass=ABCMeta ): """ - Uploads one file to the given bucket. Uploads the contents - of the local file, and assigns the given name to the B2 file, + Content type is optional. + If not set, it will be guessed. + + The maximum number of upload threads to use to upload parts of a large file is specified by ``--threads``. + It has no effect on "small" files (under 200MB as of writing this). + Default is 10. + + Each fileInfo is of the form ``a=b``. + """ + + @classmethod + def _setup_parser(cls, parser): + parser.add_argument( + '--quiet', action='store_true', help="prevents printing any information to stdout" + ) + parser.add_argument( + '--contentType', + help="MIME type of the file being uploaded. If not set it will be guessed." + ) + parser.add_argument( + '--sha1', help="SHA-1 of the data being uploaded for verifying file integrity" + ) + parser.add_argument( + '--threads', type=int, default=10, help="number of threads used for the operation" + ) + parser.add_argument('--cache-control', default=None) + parser.add_argument( + '--info', + action='append', + default=[], + help= + "additional file info to be stored with the file. Can be used multiple times for different information." + ) + parser.add_argument( + '--custom-upload-timestamp', + type=int, + help="overrides object creation date. Expressed as a number of milliseconds since epoch." + ) + parser.add_argument( + 'bucketName', help="name of the bucket where the file will be stored" + ).completer = bucket_name_completer + parser.add_argument('localFilePath', help="path of the local file or stream to be uploaded") + parser.add_argument('b2FileName', help="name file will be given when stored in B2") + + super()._setup_parser(parser) # add parameters from the mixins + + def run(self, args): + + # FIXME: This is using deprecated API. It should be replaced when moving to b2sdk apiver 3. + # There is `max_upload_workers` param in B2Api constructor for this. + self.api.services.upload_manager.set_thread_pool_size(args.threads) + + upload_kwargs = self.get_execute_kwargs(args) + file_info = self.execute_operation(**upload_kwargs) + if not args.quiet: + bucket = upload_kwargs["bucket"] + self._print("URL by file name: " + bucket.get_download_url(file_info.file_name)) + self._print("URL by fileId: " + self.api.get_download_url_for_fileid(file_info.id_)) + self._print_json(file_info) + return 0 + + def get_execute_kwargs(self, args) -> dict: + file_infos = self._parse_file_infos(args.info) + + if SRC_LAST_MODIFIED_MILLIS not in file_infos and os.path.exists(args.localFilePath): + try: + mtime = os.path.getmtime(args.localFilePath) + except OSError: + if not points_to_fifo(pathlib.Path(args.localFilePath)): + self._print_stderr( + "WARNING: Unable to determine file modification timestamp. " + f"{SRC_LAST_MODIFIED_MILLIS!r} file info won't be set." + ) + else: + file_infos[SRC_LAST_MODIFIED_MILLIS] = str(int(mtime * 1000)) + + return { + "bucket": self.api.get_bucket_by_name(args.bucketName), + "cache_control": args.cache_control, + "content_type": args.contentType, + "custom_upload_timestamp": args.custom_upload_timestamp, + "encryption": self._get_destination_sse_setting(args), + "file_info": file_infos, + "file_name": args.b2FileName, + "file_retention": self._get_file_retention_setting(args), + "legal_hold": self._get_legal_hold_setting(args), + "local_file": args.localFilePath, + "min_part_size": args.minPartSize, + "progress_listener": make_progress_listener(args.localFilePath, args.noProgress), + "sha1_sum": args.sha1, + "threads": args.threads, + } + + @abstractmethod + def execute_operation(self, **kwargs) -> 'b2sdk.file_version.FileVersion': + raise NotImplementedError + + def upload_file_kwargs_to_unbound_upload(self, **kwargs): + """ + Translate upload_file kwargs to unbound_upload equivalents + """ + kwargs["large_file_sha1"] = kwargs.pop("sha1_sum", None) + kwargs["buffers_count"] = kwargs["threads"] + 1 + kwargs["read_size"] = kwargs["min_part_size"] or DEFAULT_MIN_PART_SIZE + return kwargs + + def get_input_stream(self, filename: str) -> 'str | int | io.BinaryIO': + """Get input stream IF filename points to a FIFO or stdin.""" + if filename == "-": + if os.path.exists('-'): + self._print_stderr( + "WARNING: Filename `-` won't be supported in the future and will be treated as stdin alias." + ) + else: + return sys.stdin.buffer if platform.system() == "Windows" else sys.stdin.fileno() + elif points_to_fifo(pathlib.Path(filename)): + return filename + + raise self.NotAnInputStream() + + def file_identifier_to_read_stream( + self, file_id: 'str | int | BinaryIO', buffering + ) -> BinaryIO: + if isinstance(file_id, (str, int)): + return open( + file_id, + mode="rb", + closefd=not isinstance(file_id, int), + buffering=buffering, + ) + return file_id + + class NotAnInputStream(Exception): + pass + + +@B2.register_subcommand +class UploadFile(UploadFileMixin, UploadModeMixin, Command): + """ + Uploads one file to the given bucket. + + Uploads the contents of the local file, and assigns the given name to the B2 file, possibly setting options like server-side encryption and retention. - {FILE_RETENTION_COMPATIBILITY_WARNING} + A FIFO file (such as named pipe) can be given instead of regular file. By default, upload_file will compute the sha1 checksum of the file to be uploaded. But, if you already have it, you can provide it on the command line to save a little time. - Content type is optional. If not set, it will be set based on the - file extension. + {FILE_RETENTION_COMPATIBILITY_WARNING} + {UPLOADFILEMIXIN} + {MINPARTSIZEMIXIN} + {PROGRESSMIXIN} + {DESTINATIONSSEMIXIN} + {FILERETENTIONSETTINGMIXIN} + {LEGALHOLDMIXIN} + {UPLOADMODEMIXIN} + + The ``--custom-upload-timestamp``, in milliseconds-since-epoch, can be used + to artificially change the upload timestamp of the file for the purpose + of preserving retention policies after migration of data from other storage. + The access to this feature is restricted - if you really need it, you'll + need to contact customer support to enable it temporarily for your account. - By default, the file is broken into as many parts as possible to - maximize upload parallelism and increase speed. The minimum that - B2 allows is 100MB. Setting ``--minPartSize`` to a larger value will - reduce the number of parts uploaded when uploading a large file. + Requires capability: - The maximum number of upload threads to use to upload parts of a large file - is specified by ``--threads``. It has no effect on small files (under 200MB). - Default is 10. + - **writeFiles** + """ - If the ``tqdm`` library is installed, progress bar is displayed - on stderr. Without it, simple text progress is printed. - Use ``--noProgress`` to disable progress reporting. + def get_execute_kwargs(self, args) -> dict: + kwargs = super().get_execute_kwargs(args) + kwargs["upload_mode"] = self._get_upload_mode_from_args(args) + return kwargs - Each fileInfo is of the form ``a=b``. + def execute_operation(self, local_file, bucket, threads, **kwargs): + try: + input_stream = self.get_input_stream(local_file) + except self.NotAnInputStream: # it is a regular file + file_version = bucket.upload_local_file(local_file=local_file, **kwargs) + else: + if kwargs.pop("upload_mode", None) != UploadMode.FULL: + self._print_stderr( + "WARNING: Ignoring upload mode setting as we are uploading a stream." + ) + kwargs = self.upload_file_kwargs_to_unbound_upload(threads=threads, **kwargs) + del kwargs["threads"] + input_stream = self.file_identifier_to_read_stream( + input_stream, kwargs["min_part_size"] or DEFAULT_MIN_PART_SIZE + ) + with input_stream: + file_version = bucket.upload_unbound_stream(read_only_object=input_stream, **kwargs) + return file_version + + +@B2.register_subcommand +class UploadUnboundStream(UploadFileMixin, Command): + """ + Uploads an unbound stream to the given bucket. + + Uploads the contents of the unbound stream such as stdin or named pipe, + and assigns the given name to the resulting B2 file. + + {FILE_RETENTION_COMPATIBILITY_WARNING} + {UPLOADFILEMIXIN} + {MINPARTSIZEMIXIN} + + As opposed to ``b2 upload-file``, ``b2 upload-unbound-stream`` cannot choose optimal `partSize` on its own. + So on memory constrained system it is best to use ``--partSize`` option to set it manually. + During upload of unbound stream ``--partSize`` as well as ``--threads`` determine the amount of memory used. + The maximum memory use for the upload buffers can be estimated at ``partSize * threads``, that is ~1GB by default. + What is more, B2 Large File may consist of at most 10,000 parts, so ``minPartSize`` should be adjusted accordingly, + if you expect the stream to be larger than 50GB. + {PROGRESSMIXIN} {DESTINATIONSSEMIXIN} {FILERETENTIONSETTINGMIXIN} {LEGALHOLDMIXIN} - {UPLOADMODEMIXIN} The ``--custom-upload-timestamp``, in milliseconds-since-epoch, can be used to artificially change the upload timestamp of the file for the purpose @@ -2659,57 +2879,47 @@ class UploadFile( @classmethod def _setup_parser(cls, parser): - parser.add_argument('--noProgress', action='store_true') - parser.add_argument('--quiet', action='store_true') - parser.add_argument('--contentType') - parser.add_argument('--minPartSize', type=int) - parser.add_argument('--sha1') - parser.add_argument('--threads', type=int, default=10) - parser.add_argument('--cache-control', default=None) - parser.add_argument('--info', action='append', default=[]) - parser.add_argument('--custom-upload-timestamp', type=int) - parser.add_argument('bucketName').completer = bucket_name_completer - parser.add_argument('localFilePath') - parser.add_argument('b2FileName') - - super()._setup_parser(parser) # add parameters from the mixins + parser.add_argument( + '--partSize', + type=int, + default=None, + help=("part size in bytes. Must be in range of "), + ) + parser.add_argument( + '--unusedBufferTimeoutSeconds', + type=float, + default=3600.0, + help=( + "maximum time in seconds that not a single part may sit in the queue," + " waiting to be uploaded, before an error is returned" + ), + ) + super()._setup_parser(parser) - def run(self, args): - file_infos = self._parse_file_infos(args.info) + def get_execute_kwargs(self, args) -> dict: + kwargs = super().get_execute_kwargs(args) + kwargs = self.upload_file_kwargs_to_unbound_upload(**kwargs) + kwargs["recommended_upload_part_size"] = args.partSize + kwargs["unused_buffer_timeout_seconds"] = args.unusedBufferTimeoutSeconds + return kwargs - if SRC_LAST_MODIFIED_MILLIS not in file_infos: - file_infos[SRC_LAST_MODIFIED_MILLIS] = str( - int(os.path.getmtime(args.localFilePath) * 1000) + def execute_operation(self, local_file, bucket, threads, **kwargs): + try: + input_stream = self.get_input_stream(local_file) + except self.NotAnInputStream: # it is a regular file + self._print_stderr( + "WARNING: You are using a stream upload command to upload a regular file. " + "While it will work, it is inefficient. " + "Use of upload-file command is recommended." ) + input_stream = local_file - # FIXME: This is using deprecated API. It should be be replaced when moving to b2sdk apiver 3. - # There is `max_upload_workers` param in B2Api constructor for this. - self.api.services.upload_manager.set_thread_pool_size(args.threads) - - bucket = self.api.get_bucket_by_name(args.bucketName) - encryption_setting = self._get_destination_sse_setting(args) - legal_hold = self._get_legal_hold_setting(args) - file_retention = self._get_file_retention_setting(args) - file_info = bucket.upload_local_file( - local_file=args.localFilePath, - file_name=args.b2FileName, - content_type=args.contentType, - file_infos=file_infos, - sha1_sum=args.sha1, - min_part_size=args.minPartSize, - progress_listener=make_progress_listener(args.localFilePath, args.noProgress), - encryption=encryption_setting, - file_retention=file_retention, - legal_hold=legal_hold, - upload_mode=self._get_upload_mode_from_args(args), - custom_upload_timestamp=args.custom_upload_timestamp, - cache_control=args.cache_control, + input_stream = self.file_identifier_to_read_stream( + input_stream, kwargs["min_part_size"] or DEFAULT_MIN_PART_SIZE ) - if not args.quiet: - self._print("URL by file name: " + bucket.get_download_url(args.b2FileName)) - self._print("URL by fileId: " + self.api.get_download_url_for_fileid(file_info.id_)) - self._print_json(file_info) - return 0 + with input_stream: + file_version = bucket.upload_unbound_stream(read_only_object=input_stream, **kwargs) + return file_version @B2.register_subcommand @@ -2914,7 +3124,8 @@ def alter_rule_by_name(cls, bucket: Bucket, name: str) -> Tuple[bool, bool]: ) return found, altered - @abstractclassmethod + @classmethod + @abstractmethod def alter_one_rule(cls, rule: ReplicationRule) -> Optional[ReplicationRule]: """ return None to delete a rule """ pass diff --git a/test/helpers.py b/test/helpers.py new file mode 100644 index 000000000..5b1181a02 --- /dev/null +++ b/test/helpers.py @@ -0,0 +1,19 @@ +###################################################################### +# +# File: test/helpers.py +# +# Copyright 2023 Backblaze Inc. All Rights Reserved. +# +# License https://www.backblaze.com/using_b2_code.html +# +###################################################################### +import platform + +import pytest + + +def skip_on_windows(*args, reason='Not supported on Windows', **kwargs): + return pytest.mark.skipif( + platform.system() == 'Windows', + reason=reason, + )(*args, **kwargs) diff --git a/test/integration/conftest.py b/test/integration/conftest.py index e8c84f871..c36e62a0b 100644 --- a/test/integration/conftest.py +++ b/test/integration/conftest.py @@ -9,6 +9,8 @@ ###################################################################### import contextlib +import os +import subprocess import sys from os import environ, path from tempfile import TemporaryDirectory @@ -156,6 +158,68 @@ def b2_tool(global_b2_tool): SECRET_FIXTURES = {'application_key', 'application_key_id'} +@pytest.fixture(scope="session") +def homedir(tmp_path_factory): + yield tmp_path_factory.mktemp("test_homedir") + + +@pytest.fixture(scope="session") +def b2_in_path(tmp_path_factory): + """ + Create a dummy b2 executable in a temporary directory and add it to PATH. + + This allows us to test the b2 command from shell level even if tested `b2` package was not installed. + """ + + tempdir = tmp_path_factory.mktemp("temp_bin") + temp_executable = tempdir / "b2" + with open(temp_executable, "w") as f: + f.write( + f"#!{sys.executable}\n" + "import sys\n" + f"sys.path.insert(0, {os.getcwd()!r})\n" # ensure relative imports work even if command is run in different directory + "from b2.console_tool import main\n" + "main()\n" + ) + + temp_executable.chmod(0o700) + + original_path = os.environ["PATH"] + new_path = f"{tempdir}:{original_path}" + yield new_path + + +@pytest.fixture(scope="module") +def env(b2_in_path, homedir, monkey_patch): + """Get ENV for running b2 command from shell level.""" + monkey_patch.setenv('PATH', b2_in_path) + monkey_patch.setenv('HOME', str(homedir)) + monkey_patch.setenv('SHELL', "/bin/bash") # fix for running under github actions + yield os.environ + + +@pytest.fixture +def bash_runner(env): + """Run command in bash shell.""" + + def run_command(command: str): + try: + return subprocess.run( + ["/bin/bash", "-c", command], + capture_output=True, + check=True, + env=env, + text=True, + ) + except subprocess.CalledProcessError as e: + print(f"Command {command!r} failed with exit code {e.returncode}") + print(e.stdout) + print(e.stderr, file=sys.stderr) + raise + + return run_command + + def pytest_collection_modifyitems(items): """ Add 'require_secrets' marker to all tests that use secrets. diff --git a/test/integration/helpers.py b/test/integration/helpers.py index 84c447a41..182694e71 100644 --- a/test/integration/helpers.py +++ b/test/integration/helpers.py @@ -28,7 +28,6 @@ from typing import List, Optional, Union import backoff -import pytest from b2sdk._v3.exception import BucketIdNotFound as v3BucketIdNotFound from b2sdk.v2 import ( ALL_CAPABILITIES, @@ -312,8 +311,8 @@ def run_command( environ['PYTHONPATH'] = '.' environ['PYTHONIOENCODING'] = 'utf-8' command = cmd.split(' ') - args = [str(arg) for arg in args] - command.extend(args or []) + args: List[str] = [str(arg) for arg in args] if args else [] + command.extend(args) print('Running:', ' '.join(command)) @@ -530,10 +529,3 @@ def set_file_mod_time_millis(path: Union[str, Path], time): def random_hex(length): return ''.join(random.choice('0123456789abcdef') for _ in range(length)) - - -def skip_on_windows(*args, reason='Not supported on Windows', **kwargs): - return pytest.mark.skipif( - platform.system() == 'Windows', - reason=reason, - )(*args, **kwargs) diff --git a/test/integration/test_autocomplete.py b/test/integration/test_autocomplete.py index 6944342c0..8b77d5e3b 100644 --- a/test/integration/test_autocomplete.py +++ b/test/integration/test_autocomplete.py @@ -8,9 +8,8 @@ # ###################################################################### -import os import sys -from test.integration.helpers import skip_on_windows +from test.helpers import skip_on_windows import pexpect import pytest @@ -27,11 +26,6 @@ """ -@pytest.fixture(scope="session") -def homedir(tmp_path_factory): - yield tmp_path_factory.mktemp("test_homedir") - - @pytest.fixture(scope="session") def bashrc(homedir): bashrc_path = (homedir / '.bashrc') @@ -39,40 +33,6 @@ def bashrc(homedir): yield bashrc_path -@pytest.fixture(scope="session") -def b2_in_path(tmp_path_factory): - """ - Create a dummy b2 executable in a temporary directory and add it to PATH. - - This allows us to test the b2 command line tool even if tested `b2` package was not installed. - """ - - tempdir = tmp_path_factory.mktemp("temp_bin") - temp_executable = tempdir / "b2" - with open(temp_executable, "w") as f: - f.write( - f"#!{sys.executable}\n" - "import sys\n" - f"sys.path.insert(0, {os.getcwd()!r})\n" # ensure relative imports work even if command is run in different directory - "from b2.console_tool import main\n" - "main()\n" - ) - - temp_executable.chmod(0o755) - - original_path = os.environ["PATH"] - new_path = f"{tempdir}:{original_path}" - yield new_path - - -@pytest.fixture(scope="module") -def env(b2_in_path, homedir, monkey_patch): - monkey_patch.setenv('PATH', b2_in_path) - monkey_patch.setenv('HOME', str(homedir)) - monkey_patch.setenv('SHELL', "/bin/bash") # fix for running under github actions - yield os.environ - - @pytest.fixture(scope="module") def autocomplete_installed(env, homedir, bashrc): shell = pexpect.spawn( diff --git a/test/integration/test_b2_command_line.py b/test/integration/test_b2_command_line.py index 353242067..d90449416 100644 --- a/test/integration/test_b2_command_line.py +++ b/test/integration/test_b2_command_line.py @@ -35,6 +35,7 @@ from b2.console_tool import current_time_millis +from ..helpers import skip_on_windows from .helpers import ( BUCKET_CREATED_AT_MILLIS, ONE_DAY_MILLIS, @@ -1975,7 +1976,7 @@ def file_lock_without_perms_test( def test_profile_switch(b2_tool): # this test could be unit, but it adds a lot of complexity because of - # necessarity to pass mocked B2Api to ConsoleTool; it's much easier to + # necessity to pass mocked B2Api to ConsoleTool; it's much easier to # just have an integration test instead MISSING_ACCOUNT_PATTERN = 'Missing account data' @@ -2579,3 +2580,23 @@ def test_cut(b2_tool, bucket_name): len(file_data), ) ) + + +@skip_on_windows +def test_upload_file__stdin_pipe_operator(bash_runner, b2_tool, bucket_name, request): + """Test upload-file from stdin using pipe operator.""" + content = request.node.name + run = bash_runner( + f'echo -n {content!r} | b2 upload-file {bucket_name} - {request.node.name}.txt' + ) + assert hashlib.sha1(content.encode()).hexdigest() in run.stdout + + +@skip_on_windows +def test_upload_unbound_stream__redirect_operator(bash_runner, b2_tool, bucket_name, request): + """Test upload-unbound-stream from stdin using redirect operator.""" + content = request.node.name + run = bash_runner( + f'b2 upload-unbound-stream {bucket_name} <(echo -n {content}) {request.node.name}.txt' + ) + assert hashlib.sha1(content.encode()).hexdigest() in run.stdout diff --git a/test/static/test_licenses.py b/test/static/test_licenses.py index 15f7706d5..670142e88 100644 --- a/test/static/test_licenses.py +++ b/test/static/test_licenses.py @@ -7,15 +7,29 @@ # License https://www.backblaze.com/using_b2_code.html # ###################################################################### - +from datetime import datetime from glob import glob from itertools import islice import pytest +FIXER_CMD = "python test/static/test_licenses.py" +LICENSE_HEADER_TMPL = """\ +###################################################################### +# +# File: {path} +# +# Copyright {year} Backblaze Inc. All Rights Reserved. +# +# License https://www.backblaze.com/using_b2_code.html +# +###################################################################### +""" + -def test_files_headers(): - for file in glob('**/*.py', recursive=True): +def get_file_header_errors(file_path_glob: str) -> dict[str, str]: + failed_files = {} + for file in glob(file_path_glob, recursive=True): if file.startswith('build/'): # built files naturally have a different file path than source files continue @@ -25,6 +39,35 @@ def test_files_headers(): ) # glob('**/*.py') on Windows returns "b2\console_tool.py" (wrong slash) head = ''.join(islice(fd, 9)) if 'All Rights Reserved' not in head: - pytest.fail(f'Missing "All Rights Reserved" in the header in: {file}') - if file not in head: - pytest.fail(f'Wrong file name in the header in: {file}') + failed_files[file] = 'Missing "All Rights Reserved" in the header' + elif file not in head: + failed_files[file] = 'Wrong file name in the header' + return failed_files + + +def test_files_headers(): + failed_files = get_file_header_errors('**/*.py') + if failed_files: + error_msg = '; '.join(f'{path}:{error}' for path, error in failed_files.items()) + pytest.fail(f'Bad file headers in files (you may want to run {FIXER_CMD!r}): {error_msg}') + + +def insert_header(file_path: str): + with open(file_path, 'r+') as fd: + content = fd.read() + fd.seek(0) + fd.write(LICENSE_HEADER_TMPL.format( + path=file_path, + year=datetime.now().year, + )) + fd.write(content) + + +def _main(): + failed_files = get_file_header_errors('**/*.py') + for filepath in failed_files: + insert_header(filepath) + + +if __name__ == '__main__': + _main() diff --git a/test/unit/_utils/test_filesystem.py b/test/unit/_utils/test_filesystem.py new file mode 100644 index 000000000..5e2738cd3 --- /dev/null +++ b/test/unit/_utils/test_filesystem.py @@ -0,0 +1,32 @@ +###################################################################### +# +# File: test/unit/_utils/test_filesystem.py +# +# Copyright 2023 Backblaze Inc. All Rights Reserved. +# +# License https://www.backblaze.com/using_b2_code.html +# +###################################################################### +import os +from test.helpers import skip_on_windows + +from b2._utils.filesystem import points_to_fifo + + +def test_points_to_fifo__doesnt_exist(tmp_path): + non_existent = tmp_path / 'non-existent' + assert not non_existent.exists() + assert not points_to_fifo(non_existent) + + +@skip_on_windows +def test_points_to_fifo__named_pipe(tmp_path): + named_pipe = tmp_path / 'fifo' + os.mkfifo(str(named_pipe)) + assert points_to_fifo(named_pipe) + + +def test_points_to_fifo__regular_file(tmp_path): + regular_file = tmp_path / 'regular' + regular_file.write_text('hello') + assert not points_to_fifo(regular_file) diff --git a/test/unit/conftest.py b/test/unit/conftest.py new file mode 100644 index 000000000..e7894c900 --- /dev/null +++ b/test/unit/conftest.py @@ -0,0 +1,19 @@ +###################################################################### +# +# File: test/unit/conftest.py +# +# Copyright 2023 Backblaze Inc. All Rights Reserved. +# +# License https://www.backblaze.com/using_b2_code.html +# +###################################################################### +from unittest import mock + +import pytest +from b2sdk.raw_api import REALM_URLS + + +@pytest.fixture(autouse=True, scope='session') +def mock_realm_urls(): + with mock.patch.dict(REALM_URLS, {'production': 'http://production.example.com'}): + yield diff --git a/test/unit/console_tool/__init__.py b/test/unit/console_tool/__init__.py new file mode 100644 index 000000000..677ed3dfd --- /dev/null +++ b/test/unit/console_tool/__init__.py @@ -0,0 +1,10 @@ +###################################################################### +# +# File: test/unit/console_tool/__init__.py +# +# Copyright 2023 Backblaze Inc. All Rights Reserved. +# +# License https://www.backblaze.com/using_b2_code.html +# +###################################################################### +"""Tests for the console_tool commands.""" diff --git a/test/unit/console_tool/conftest.py b/test/unit/console_tool/conftest.py new file mode 100644 index 000000000..ddf5ca4e2 --- /dev/null +++ b/test/unit/console_tool/conftest.py @@ -0,0 +1,52 @@ +###################################################################### +# +# File: test/unit/console_tool/conftest.py +# +# Copyright 2023 Backblaze Inc. All Rights Reserved. +# +# License https://www.backblaze.com/using_b2_code.html +# +###################################################################### +import os +import sys +from test.unit.test_console_tool import BaseConsoleToolTest + +import pytest + + +class ConsoleToolTester(BaseConsoleToolTest): + def authorize(self): + self._authorize_account() + + def run(self, *args, **kwargs): + return self._run_command(*args, **kwargs) + + +@pytest.fixture +def b2_cli(): + cli_tester = ConsoleToolTester() + cli_tester.setUp() + yield cli_tester + cli_tester.tearDown() + + +@pytest.fixture +def authorized_b2_cli(b2_cli): + b2_cli.authorize() + yield b2_cli + + +@pytest.fixture +def bucket(b2_cli, authorized_b2_cli): + bucket_name = "my-bucket" + b2_cli.run(['create-bucket', bucket_name, 'allPublic'], expected_stdout='bucket_0\n') + yield bucket_name + + +@pytest.fixture +def mock_stdin(monkeypatch): + out_, in_ = os.pipe() + monkeypatch.setattr(sys, 'stdin', os.fdopen(out_)) + in_f = open(in_, 'w') + yield in_f + in_f.close() diff --git a/test/unit/console_tool/test_upload_file.py b/test/unit/console_tool/test_upload_file.py new file mode 100644 index 000000000..ff49a1af2 --- /dev/null +++ b/test/unit/console_tool/test_upload_file.py @@ -0,0 +1,119 @@ +###################################################################### +# +# File: test/unit/console_tool/test_upload_file.py +# +# Copyright 2023 Backblaze Inc. All Rights Reserved. +# +# License https://www.backblaze.com/using_b2_code.html +# +###################################################################### +import os +from test.helpers import skip_on_windows +from test.unit.helpers import run_in_background + +import b2 + + +def test_upload_file__file_info_src_last_modified_millis(b2_cli, bucket, tmpdir): + """Test upload_file supports manually specifying file info src_last_modified_millis""" + filename = 'file1.txt' + content = 'hello world' + local_file1 = tmpdir.join('file1.txt') + local_file1.write(content) + + expected_json = { + "action": "upload", + "contentSha1": "2aae6c35c94fcfb415dbe95f408b9ce91ee846ed", + "fileInfo": { + "src_last_modified_millis": "1" + }, + "fileName": filename, + "size": len(content), + } + b2_cli.run( + [ + 'upload-file', '--noProgress', '--info=src_last_modified_millis=1', 'my-bucket', + str(local_file1), 'file1.txt' + ], + expected_json_in_stdout=expected_json, + remove_version=True, + ) + + +@skip_on_windows +def test_upload_file__named_pipe(b2_cli, bucket, tmpdir): + """Test upload_file supports named pipes""" + filename = 'named_pipe.txt' + content = 'hello world' + local_file1 = tmpdir.join('file1.txt') + os.mkfifo(str(local_file1)) + writer = run_in_background( + local_file1.write, content + ) # writer will block until content is read + + expected_stdout = f'URL by file name: http://download.example.com/file/my-bucket/{filename}' + expected_json = { + "action": "upload", + "contentSha1": "2aae6c35c94fcfb415dbe95f408b9ce91ee846ed", + "contentType": "b2/x-auto", + "fileName": filename, + "size": len(content), + } + b2_cli.run( + ['upload-file', '--noProgress', 'my-bucket', + str(local_file1), filename], + expected_json_in_stdout=expected_json, + remove_version=True, + expected_part_of_stdout=expected_stdout, + ) + writer.join() + + +def test_upload_file__hyphen_file_instead_of_stdin(b2_cli, bucket, tmpdir, monkeypatch): + """Test upload_file will upload file named `-` instead of stdin by default""" + # TODO remove this in v4 + assert b2.__version__ < '4', "`-` file upload should not be supported in next major version of CLI" + filename = 'stdin.txt' + content = "I'm very rare creature, a file named '-'" + monkeypatch.chdir(str(tmpdir)) + source_file = tmpdir.join('-') + source_file.write(content) + + expected_stdout = f'URL by file name: http://download.example.com/file/my-bucket/{filename}' + expected_json = { + "action": "upload", + "contentSha1": "ab467567b98216a255f77aef08aa2c418073d974", + "fileName": filename, + "size": len(content), + } + b2_cli.run( + ['upload-file', '--noProgress', 'my-bucket', '-', filename], + expected_json_in_stdout=expected_json, + remove_version=True, + expected_part_of_stdout=expected_stdout, + expected_stderr= + "WARNING: Filename `-` won't be supported in the future and will be treated as stdin alias.\n", + ) + + +def test_upload_file__stdin(b2_cli, bucket, tmpdir, mock_stdin): + """Test upload_file stdin alias support""" + content = "stdin input" + filename = 'stdin.txt' + + expected_stdout = f'URL by file name: http://download.example.com/file/my-bucket/{filename}' + expected_json = { + "action": "upload", + "contentSha1": "2ce72aa159d1f190fddf295cc883f20c4787a751", + "fileName": filename, + "size": len(content), + } + mock_stdin.write(content) + mock_stdin.close() + + b2_cli.run( + ['upload-file', '--noProgress', 'my-bucket', '-', filename], + expected_json_in_stdout=expected_json, + remove_version=True, + expected_part_of_stdout=expected_stdout, + ) diff --git a/test/unit/console_tool/test_upload_unbound_stream.py b/test/unit/console_tool/test_upload_unbound_stream.py new file mode 100644 index 000000000..9c9015325 --- /dev/null +++ b/test/unit/console_tool/test_upload_unbound_stream.py @@ -0,0 +1,128 @@ +###################################################################### +# +# File: test/unit/console_tool/test_upload_unbound_stream.py +# +# Copyright 2023 Backblaze Inc. All Rights Reserved. +# +# License https://www.backblaze.com/using_b2_code.html +# +###################################################################### +import os +from test.helpers import skip_on_windows +from test.unit.helpers import run_in_background + +from b2._cli.const import DEFAULT_MIN_PART_SIZE + + +@skip_on_windows +def test_upload_unbound_stream__named_pipe(b2_cli, bucket, tmpdir): + """Test upload_unbound_stream supports named pipes""" + filename = 'named_pipe.txt' + content = 'hello world' + fifo_file = tmpdir.join('fifo_file.txt') + os.mkfifo(str(fifo_file)) + writer = run_in_background(fifo_file.write, content) # writer will block until content is read + + expected_stdout = f'URL by file name: http://download.example.com/file/my-bucket/{filename}' + expected_json = { + "action": "upload", + "contentSha1": "2aae6c35c94fcfb415dbe95f408b9ce91ee846ed", + "fileName": filename, + "size": len(content), + } + b2_cli.run( + ['upload-unbound-stream', '--noProgress', 'my-bucket', + str(fifo_file), filename], + expected_json_in_stdout=expected_json, + remove_version=True, + expected_part_of_stdout=expected_stdout, + ) + writer.join() + + +def test_upload_unbound_stream__stdin(b2_cli, bucket, tmpdir, mock_stdin): + """Test upload_unbound_stream stdin alias support""" + content = "stdin input" + filename = 'stdin.txt' + + expected_stdout = f'URL by file name: http://download.example.com/file/my-bucket/{filename}' + expected_json = { + "action": "upload", + "contentSha1": "2ce72aa159d1f190fddf295cc883f20c4787a751", + "fileName": filename, + "size": len(content), + } + mock_stdin.write(content) + mock_stdin.close() + + b2_cli.run( + ['upload-unbound-stream', '--noProgress', 'my-bucket', '-', filename], + expected_json_in_stdout=expected_json, + remove_version=True, + expected_part_of_stdout=expected_stdout, + ) + + +@skip_on_windows +def test_upload_unbound_stream__with_part_size_options(b2_cli, bucket, tmpdir, mock_stdin): + """Test upload_unbound_stream with part size options""" + part_size = DEFAULT_MIN_PART_SIZE + expected_size = part_size + 500 # has to be bigger to force multipart upload + + filename = 'named_pipe.txt' + fifo_file = tmpdir.join('fifo_file.txt') + os.mkfifo(str(fifo_file)) + writer = run_in_background( + fifo_file.write, "x" * expected_size + ) # writer will block until content is read + + expected_stdout = f'URL by file name: http://download.example.com/file/my-bucket/{filename}' + expected_json = { + "action": "upload", + "fileName": filename, + "size": expected_size, + } + + b2_cli.run( + [ + 'upload-unbound-stream', + '--minPartSize', + str(DEFAULT_MIN_PART_SIZE), + '--partSize', + str(part_size), + '--noProgress', + 'my-bucket', + str(fifo_file), + filename, + ], + expected_json_in_stdout=expected_json, + remove_version=True, + expected_part_of_stdout=expected_stdout, + ) + writer.join() + + +def test_upload_unbound_stream__regular_file(b2_cli, bucket, tmpdir): + """Test upload_unbound_stream regular file support""" + content = "stdin input" + filename = 'file.txt' + filepath = tmpdir.join(filename) + filepath.write(content) + + expected_stdout = f'URL by file name: http://download.example.com/file/my-bucket/{filename}' + expected_json = { + "action": "upload", + "contentSha1": "2ce72aa159d1f190fddf295cc883f20c4787a751", + "fileName": filename, + "size": len(content), + } + + b2_cli.run( + ['upload-unbound-stream', '--noProgress', 'my-bucket', + str(filepath), filename], + expected_json_in_stdout=expected_json, + remove_version=True, + expected_part_of_stdout=expected_stdout, + expected_stderr= + "WARNING: You are using a stream upload command to upload a regular file. While it will work, it is inefficient. Use of upload-file command is recommended.\n", + ) diff --git a/test/unit/helpers.py b/test/unit/helpers.py new file mode 100644 index 000000000..62bfa3220 --- /dev/null +++ b/test/unit/helpers.py @@ -0,0 +1,16 @@ +###################################################################### +# +# File: test/unit/helpers.py +# +# Copyright 2023 Backblaze Inc. All Rights Reserved. +# +# License https://www.backblaze.com/using_b2_code.html +# +###################################################################### +import threading + + +def run_in_background(func, *args, **kwargs) -> threading.Thread: + thread = threading.Thread(target=func, args=args, kwargs=kwargs) + thread.start() + return thread diff --git a/test/unit/test_console_tool.py b/test/unit/test_console_tool.py index 065bcdf35..6be2ad037 100644 --- a/test/unit/test_console_tool.py +++ b/test/unit/test_console_tool.py @@ -20,7 +20,6 @@ from b2sdk import v1 from b2sdk.v2 import ( ALL_CAPABILITIES, - REALM_URLS, B2Api, B2HttpApiConfig, ProgressReport, @@ -247,7 +246,6 @@ def _upload_multiple_files(cls, bucket): bucket.upload(data, 'c/test.tsv') -@mock.patch.dict(REALM_URLS, {'production': 'http://production.example.com'}) class TestConsoleTool(BaseConsoleToolTest): def test_authorize_with_bad_key(self): expected_stdout = ''' @@ -2407,7 +2405,6 @@ def test_passing_api_parameters(self): assert parallel_strategy.max_streams == params['--max-download-streams-per-file'] -@mock.patch.dict(REALM_URLS, {'production': 'http://production.example.com'}) class TestConsoleToolWithV1(BaseConsoleToolTest): """These tests use v1 interface to perform various setups before running CLI commands""" @@ -2483,7 +2480,6 @@ def test_list_unfinished_large_files_with_some(self): self._run_command(['list-unfinished-large-files', 'my-bucket'], expected_stdout, '', 0) -@mock.patch.dict(REALM_URLS, {'production': 'http://production.example.com'}) class TestRmConsoleTool(BaseConsoleToolTest): """ These tests replace default progress reporter of Rm class