diff --git a/doc/record_writer_options.md b/doc/record_writer_options.md index 807006ca..ff0ebcc5 100644 --- a/doc/record_writer_options.md +++ b/doc/record_writer_options.md @@ -12,6 +12,7 @@ Options for writing Riegeli/records files can be specified as a string: "zstd" (":" zstd_level)? | "snappy" (":" snappy_level)? | "window_log" ":" window_log | + "brotli_encoder" ":" ("rbrotli_or_cbrotli" | "cbrotli" | "rbrotli") | "chunk_size" ":" chunk_size | "bucket_fraction" ":" bucket_fraction | "pad_to_block_boundary" (":" ("true" | "false" | "initially"))? | diff --git a/riegeli/chunk_encoding/BUILD b/riegeli/chunk_encoding/BUILD index 0eab7fc1..984e3243 100644 --- a/riegeli/chunk_encoding/BUILD +++ b/riegeli/chunk_encoding/BUILD @@ -120,6 +120,7 @@ cc_library( srcs = ["compressor.cc"], hdrs = ["compressor.h"], deps = [ + ":brotli_encoder_selection", ":compressor_options", ":constants", "//riegeli/base:arithmetic", @@ -129,7 +130,6 @@ cc_library( "//riegeli/base:object", "//riegeli/base:recycling_pool", "//riegeli/base:types", - "//riegeli/brotli:brotli_writer", "//riegeli/bytes:chain_writer", "//riegeli/bytes:writer", "//riegeli/snappy:snappy_writer", @@ -167,6 +167,25 @@ cc_library( ], ) +cc_library( + name = "brotli_encoder_selection", + srcs = ["brotli_encoder_selection.cc"], + hdrs = ["brotli_encoder_selection.h"], + deps = [ + ":compressor_options", + "//riegeli/base:assert", + "//riegeli/base:chain", + "//riegeli/base:initializer", + "//riegeli/base:recycling_pool", + "//riegeli/brotli:brotli_writer", + "//riegeli/bytes:chain_writer", + "//riegeli/bytes:null_writer", + "//riegeli/bytes:writer", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/status", + ], +) + cc_library( name = "simple_encoder", srcs = ["simple_encoder.cc"], diff --git a/riegeli/chunk_encoding/brotli_encoder_selection.cc b/riegeli/chunk_encoding/brotli_encoder_selection.cc new file mode 100644 index 00000000..3de0b831 --- /dev/null +++ b/riegeli/chunk_encoding/brotli_encoder_selection.cc @@ -0,0 +1,62 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "riegeli/chunk_encoding/brotli_encoder_selection.h" + +#include + +#include "absl/base/attributes.h" +#include "absl/status/status.h" +#include "riegeli/base/assert.h" +#include "riegeli/base/chain.h" +#include "riegeli/base/maker.h" +#include "riegeli/base/recycling_pool.h" +#include "riegeli/brotli/brotli_writer.h" +#include "riegeli/bytes/chain_writer.h" +#include "riegeli/bytes/null_writer.h" +#include "riegeli/bytes/writer.h" +#include "riegeli/chunk_encoding/compressor_options.h" + +namespace riegeli { +namespace chunk_encoding_internal { + +ABSL_ATTRIBUTE_WEAK std::unique_ptr NewBrotliWriter( + Chain* compressed, const CompressorOptions& compressor_options, + ABSL_ATTRIBUTE_UNUSED const RecyclingPoolOptions& recycling_pool_options) { + switch (compressor_options.brotli_encoder()) { + case BrotliEncoder::kRBrotliOrCBrotli: + case BrotliEncoder::kCBrotli: + return NewCBrotliWriter(compressed, compressor_options); + case BrotliEncoder::kRBrotli: { + std::unique_ptr writer = std::make_unique(); + writer->Fail(absl::UnimplementedError("Rust Brotli not available")); + return writer; + } + } + RIEGELI_ASSERT_UNREACHABLE() + << "Unknown Brotli encoder: " + << static_cast(compressor_options.brotli_encoder()); +} + +std::unique_ptr NewCBrotliWriter( + Chain* compressed, const CompressorOptions& compressor_options) { + return std::make_unique>>( + riegeli::Maker(compressed), + BrotliWriterBase::Options() + .set_compression_level(compressor_options.compression_level()) + .set_window_log(compressor_options.brotli_window_log())); +} + +} // namespace chunk_encoding_internal +} // namespace riegeli diff --git a/riegeli/chunk_encoding/brotli_encoder_selection.h b/riegeli/chunk_encoding/brotli_encoder_selection.h new file mode 100644 index 00000000..8eef2f4b --- /dev/null +++ b/riegeli/chunk_encoding/brotli_encoder_selection.h @@ -0,0 +1,48 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef RIEGELI_CHUNK_ENCODING_BROTLI_ENCODER_SELECTION_H_ +#define RIEGELI_CHUNK_ENCODING_BROTLI_ENCODER_SELECTION_H_ + +#include + +#include "riegeli/base/chain.h" +#include "riegeli/base/recycling_pool.h" +#include "riegeli/bytes/writer.h" +#include "riegeli/chunk_encoding/compressor_options.h" + +namespace riegeli { +namespace chunk_encoding_internal { + +// Creates a `Writer` which compresses data with Brotli and writes them to +// `compressed`. +// +// The encoder implementation is determined by +// `compressor_options.brotli_encoder()`. +// +// This is a weak function. Its default definition supports only C Brotli. +// It can be overridden to support also Rust Brotli. +std::unique_ptr NewBrotliWriter( + Chain* compressed, const CompressorOptions& compressor_options, + const RecyclingPoolOptions& recycling_pool_options); + +// Support for `NewBrotliWriter()`: uses C Brotli, ignores +// `compressor_options.brotli_encoder()`. +std::unique_ptr NewCBrotliWriter( + Chain* compressed, const CompressorOptions& compressor_options); + +} // namespace chunk_encoding_internal +} // namespace riegeli + +#endif // RIEGELI_CHUNK_ENCODING_BROTLI_ENCODER_SELECTION_H_ diff --git a/riegeli/chunk_encoding/compressor.cc b/riegeli/chunk_encoding/compressor.cc index 034f33ea..7645b810 100644 --- a/riegeli/chunk_encoding/compressor.cc +++ b/riegeli/chunk_encoding/compressor.cc @@ -27,9 +27,9 @@ #include "riegeli/base/maker.h" #include "riegeli/base/object.h" #include "riegeli/base/types.h" -#include "riegeli/brotli/brotli_writer.h" #include "riegeli/bytes/chain_writer.h" #include "riegeli/bytes/writer.h" +#include "riegeli/chunk_encoding/brotli_encoder_selection.h" #include "riegeli/chunk_encoding/compressor_options.h" #include "riegeli/chunk_encoding/constants.h" #include "riegeli/snappy/snappy_writer.h" @@ -64,11 +64,8 @@ inline void Compressor::Initialize() { writer_ = std::make_unique>(&compressed_); return; case CompressionType::kBrotli: - writer_ = std::make_unique>>( - riegeli::Maker(&compressed_), - BrotliWriterBase::Options() - .set_compression_level(compressor_options_.compression_level()) - .set_window_log(compressor_options_.brotli_window_log())); + writer_ = NewBrotliWriter(&compressed_, compressor_options_, + tuning_options_.recycling_pool_options()); return; case CompressionType::kZstd: writer_ = std::make_unique>>( diff --git a/riegeli/chunk_encoding/compressor_options.cc b/riegeli/chunk_encoding/compressor_options.cc index 332cf16f..408b245e 100644 --- a/riegeli/chunk_encoding/compressor_options.cc +++ b/riegeli/chunk_encoding/compressor_options.cc @@ -79,6 +79,8 @@ absl::Status CompressorOptions::FromString(absl::string_view text) { })); options_parser.AddOption("window_log", [](ValueParser& value_parser) { return true; }); + options_parser.AddOption("brotli_encoder", + [](ValueParser& value_parser) { return true; }); if (ABSL_PREDICT_FALSE(!options_parser.FromString(text))) { return options_parser.status(); } @@ -149,6 +151,13 @@ absl::Status CompressorOptions::FromString(absl::string_view text) { RIEGELI_ASSERT_UNREACHABLE() << "Unknown compression type: " << static_cast(compression_type_); }()); + options_parser.AddOption( + "brotli_encoder", + ValueParser::Enum( + {{"rbrotli_or_cbrotli", BrotliEncoder::kRBrotliOrCBrotli}, + {"cbrotli", BrotliEncoder::kCBrotli}, + {"rbrotli", BrotliEncoder::kRBrotli}}, + &brotli_encoder_)); if (ABSL_PREDICT_FALSE(!options_parser.FromString(text))) { return options_parser.status(); } @@ -157,7 +166,7 @@ absl::Status CompressorOptions::FromString(absl::string_view text) { int CompressorOptions::brotli_window_log() const { RIEGELI_ASSERT(compression_type_ == CompressionType::kBrotli) - << "Failed precodition of CompressorOptions::brotli_window_log(): " + << "Failed precondition of CompressorOptions::brotli_window_log(): " "compression type must be Brotli"; if (window_log_ == absl::nullopt) { return BrotliWriterBase::Options::kDefaultWindowLog; @@ -174,7 +183,7 @@ int CompressorOptions::brotli_window_log() const { absl::optional CompressorOptions::zstd_window_log() const { RIEGELI_ASSERT(compression_type_ == CompressionType::kZstd) - << "Failed precodition of CompressorOptions::zstd_window_log(): " + << "Failed precondition of CompressorOptions::zstd_window_log(): " "compression type must be Zstd"; if (window_log_ != absl::nullopt) { RIEGELI_ASSERT_GE(*window_log_, ZstdWriterBase::Options::kMinWindowLog) diff --git a/riegeli/chunk_encoding/compressor_options.h b/riegeli/chunk_encoding/compressor_options.h index 7b92d437..ce07a684 100644 --- a/riegeli/chunk_encoding/compressor_options.h +++ b/riegeli/chunk_encoding/compressor_options.h @@ -29,6 +29,16 @@ namespace riegeli { +// The implementation of the Brotli encoder to use. Experimental, meant for +// evaluation. Prefer to keep the default. +// +// Rust Brotli is currently not available in open sourced Riegeli. +enum class BrotliEncoder { + kRBrotliOrCBrotli, // Rust Brotli if available, C Brotli otherwise. Default. + kCBrotli, // C Brotli. + kRBrotli, // Rust Brotli if available, fail otherwise. +}; + class CompressorOptions { public: CompressorOptions() noexcept {} @@ -41,7 +51,8 @@ class CompressorOptions { // "brotli" (":" brotli_level)? | // "zstd" (":" zstd_level)? | // "snappy" (":" snappy_level)? | - // "window_log" ":" window_log + // "window_log" ":" window_log | + // "brotli_encoder" ":" ("rbrotli_or_cbrotli" | "cbrotli" | "rbrotli") // brotli_level ::= integer in the range [0..11] (default 6) // zstd_level ::= integer in the range [-131072..22] (default 3) // snappy_level ::= integer in the range [1..2] (default 1) @@ -192,10 +203,31 @@ class CompressorOptions { // Precondition: `compression_type() == CompressionType::kZstd` absl::optional zstd_window_log() const; + // The implementation of the Brotli encoder to use. Experimental, meant for + // evaluation. Prefer to keep the default. + // + // This is ignored if `compression_type() != CompressionType::kBrotli`. + // + // If Rust Brotli is used, the interpretation of compression levels is + // slightly different (in particular compression levels smaller than 3 are + // equivalent to 3, and compression levels larger than 7 are equivalent to 7), + // and `window_log()` is ignored. + // + // Default: `BrotliEncoder::kRBrotliOrCBrotli`. + CompressorOptions& set_brotli_encoder(BrotliEncoder brotli_encoder) & { + brotli_encoder_ = brotli_encoder; + return *this; + } + CompressorOptions&& set_brotli_encoder(BrotliEncoder brotli_encoder) && { + return std::move(set_brotli_encoder(brotli_encoder)); + } + BrotliEncoder brotli_encoder() const { return brotli_encoder_; } + private: CompressionType compression_type_ = CompressionType::kBrotli; int compression_level_ = kDefaultBrotli; absl::optional window_log_; + BrotliEncoder brotli_encoder_ = BrotliEncoder::kRBrotliOrCBrotli; }; } // namespace riegeli diff --git a/riegeli/chunk_encoding/transpose_encoder.cc b/riegeli/chunk_encoding/transpose_encoder.cc index 47a47f21..d5fbb0d5 100644 --- a/riegeli/chunk_encoding/transpose_encoder.cc +++ b/riegeli/chunk_encoding/transpose_encoder.cc @@ -513,7 +513,7 @@ inline bool TransposeEncoder::AddBuffer( .set_recycling_pool_options(recycling_pool_options_)); } if (ABSL_PREDICT_FALSE(!bucket_compressor.writer().Write(buffer))) { - return Fail(bucket_compressor.status()); + return Fail(bucket_compressor.writer().status()); } return true; } diff --git a/riegeli/records/record_writer.cc b/riegeli/records/record_writer.cc index 475cecd4..441c96bf 100644 --- a/riegeli/records/record_writer.cc +++ b/riegeli/records/record_writer.cc @@ -129,6 +129,8 @@ absl::Status RecordWriterBase::Options::FromString(absl::string_view text) { options_parser.AddOption("zstd", ValueParser::CopyTo(&compressor_text)); options_parser.AddOption("snappy", ValueParser::CopyTo(&compressor_text)); options_parser.AddOption("window_log", ValueParser::CopyTo(&compressor_text)); + options_parser.AddOption("brotli_encoder", + ValueParser::CopyTo(&compressor_text)); options_parser.AddOption( "chunk_size", ValueParser::Or( diff --git a/riegeli/records/record_writer.h b/riegeli/records/record_writer.h index e63c5066..891b8467 100644 --- a/riegeli/records/record_writer.h +++ b/riegeli/records/record_writer.h @@ -87,6 +87,7 @@ class RecordWriterBase : public Object { // "zstd" (":" zstd_level)? | // "snappy" (":" snappy_level)? | // "window_log" ":" window_log | + // "brotli_encoder" ":" ("rbrotli_or_cbrotli" | "cbrotli" | "rbrotli") | // "chunk_size" ":" chunk_size | // "bucket_fraction" ":" bucket_fraction | // "pad_to_block_boundary" (":" ("true" | "false" | "initially"))? |