Skip to content

Commit

Permalink
Add a mechanism to let Riegeli/records use the Rust implementation of…
Browse files Browse the repository at this point in the history
… the Brotli

encoder instead of the C implementation.

Rust Brotli is currently not available in open sourced Riegeli.

PiperOrigin-RevId: 665264284
  • Loading branch information
QrczakMK committed Aug 20, 2024
1 parent 74f536f commit 7252ada
Show file tree
Hide file tree
Showing 10 changed files with 182 additions and 11 deletions.
1 change: 1 addition & 0 deletions doc/record_writer_options.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Options for writing Riegeli/records files can be specified as a string:
"zstd" (":" zstd_level)? |
"snappy" (":" snappy_level)? |
"window_log" ":" window_log |
"brotli_encoder" ":" ("rbrotli_or_cbrotli" | "cbrotli" | "rbrotli") |
"chunk_size" ":" chunk_size |
"bucket_fraction" ":" bucket_fraction |
"pad_to_block_boundary" (":" ("true" | "false" | "initially"))? |
Expand Down
21 changes: 20 additions & 1 deletion riegeli/chunk_encoding/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ cc_library(
srcs = ["compressor.cc"],
hdrs = ["compressor.h"],
deps = [
":brotli_encoder_selection",
":compressor_options",
":constants",
"//riegeli/base:arithmetic",
Expand All @@ -129,7 +130,6 @@ cc_library(
"//riegeli/base:object",
"//riegeli/base:recycling_pool",
"//riegeli/base:types",
"//riegeli/brotli:brotli_writer",
"//riegeli/bytes:chain_writer",
"//riegeli/bytes:writer",
"//riegeli/snappy:snappy_writer",
Expand Down Expand Up @@ -167,6 +167,25 @@ cc_library(
],
)

cc_library(
name = "brotli_encoder_selection",
srcs = ["brotli_encoder_selection.cc"],
hdrs = ["brotli_encoder_selection.h"],
deps = [
":compressor_options",
"//riegeli/base:assert",
"//riegeli/base:chain",
"//riegeli/base:initializer",
"//riegeli/base:recycling_pool",
"//riegeli/brotli:brotli_writer",
"//riegeli/bytes:chain_writer",
"//riegeli/bytes:null_writer",
"//riegeli/bytes:writer",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/status",
],
)

cc_library(
name = "simple_encoder",
srcs = ["simple_encoder.cc"],
Expand Down
62 changes: 62 additions & 0 deletions riegeli/chunk_encoding/brotli_encoder_selection.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/chunk_encoding/brotli_encoder_selection.h"

#include <memory>

#include "absl/base/attributes.h"
#include "absl/status/status.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/brotli/brotli_writer.h"
#include "riegeli/bytes/chain_writer.h"
#include "riegeli/bytes/null_writer.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/chunk_encoding/compressor_options.h"

namespace riegeli {
namespace chunk_encoding_internal {

ABSL_ATTRIBUTE_WEAK std::unique_ptr<Writer> NewBrotliWriter(
Chain* compressed, const CompressorOptions& compressor_options,
ABSL_ATTRIBUTE_UNUSED const RecyclingPoolOptions& recycling_pool_options) {
switch (compressor_options.brotli_encoder()) {
case BrotliEncoder::kRBrotliOrCBrotli:
case BrotliEncoder::kCBrotli:
return NewCBrotliWriter(compressed, compressor_options);
case BrotliEncoder::kRBrotli: {
std::unique_ptr<Writer> writer = std::make_unique<riegeli::NullWriter>();
writer->Fail(absl::UnimplementedError("Rust Brotli not available"));
return writer;
}
}
RIEGELI_ASSERT_UNREACHABLE()
<< "Unknown Brotli encoder: "
<< static_cast<int>(compressor_options.brotli_encoder());
}

std::unique_ptr<Writer> NewCBrotliWriter(
Chain* compressed, const CompressorOptions& compressor_options) {
return std::make_unique<BrotliWriter<ChainWriter<>>>(
riegeli::Maker(compressed),
BrotliWriterBase::Options()
.set_compression_level(compressor_options.compression_level())
.set_window_log(compressor_options.brotli_window_log()));
}

} // namespace chunk_encoding_internal
} // namespace riegeli
48 changes: 48 additions & 0 deletions riegeli/chunk_encoding/brotli_encoder_selection.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_CHUNK_ENCODING_BROTLI_ENCODER_SELECTION_H_
#define RIEGELI_CHUNK_ENCODING_BROTLI_ENCODER_SELECTION_H_

#include <memory>

#include "riegeli/base/chain.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/chunk_encoding/compressor_options.h"

namespace riegeli {
namespace chunk_encoding_internal {

// Creates a `Writer` which compresses data with Brotli and writes them to
// `compressed`.
//
// The encoder implementation is determined by
// `compressor_options.brotli_encoder()`.
//
// This is a weak function. Its default definition supports only C Brotli.
// It can be overridden to support also Rust Brotli.
std::unique_ptr<Writer> NewBrotliWriter(
Chain* compressed, const CompressorOptions& compressor_options,
const RecyclingPoolOptions& recycling_pool_options);

// Support for `NewBrotliWriter()`: uses C Brotli, ignores
// `compressor_options.brotli_encoder()`.
std::unique_ptr<Writer> NewCBrotliWriter(
Chain* compressed, const CompressorOptions& compressor_options);

} // namespace chunk_encoding_internal
} // namespace riegeli

#endif // RIEGELI_CHUNK_ENCODING_BROTLI_ENCODER_SELECTION_H_
9 changes: 3 additions & 6 deletions riegeli/chunk_encoding/compressor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@
#include "riegeli/base/maker.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/brotli/brotli_writer.h"
#include "riegeli/bytes/chain_writer.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/chunk_encoding/brotli_encoder_selection.h"
#include "riegeli/chunk_encoding/compressor_options.h"
#include "riegeli/chunk_encoding/constants.h"
#include "riegeli/snappy/snappy_writer.h"
Expand Down Expand Up @@ -64,11 +64,8 @@ inline void Compressor::Initialize() {
writer_ = std::make_unique<ChainWriter<>>(&compressed_);
return;
case CompressionType::kBrotli:
writer_ = std::make_unique<BrotliWriter<ChainWriter<>>>(
riegeli::Maker(&compressed_),
BrotliWriterBase::Options()
.set_compression_level(compressor_options_.compression_level())
.set_window_log(compressor_options_.brotli_window_log()));
writer_ = NewBrotliWriter(&compressed_, compressor_options_,
tuning_options_.recycling_pool_options());
return;
case CompressionType::kZstd:
writer_ = std::make_unique<ZstdWriter<ChainWriter<>>>(
Expand Down
13 changes: 11 additions & 2 deletions riegeli/chunk_encoding/compressor_options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ absl::Status CompressorOptions::FromString(absl::string_view text) {
}));
options_parser.AddOption("window_log",
[](ValueParser& value_parser) { return true; });
options_parser.AddOption("brotli_encoder",
[](ValueParser& value_parser) { return true; });
if (ABSL_PREDICT_FALSE(!options_parser.FromString(text))) {
return options_parser.status();
}
Expand Down Expand Up @@ -149,6 +151,13 @@ absl::Status CompressorOptions::FromString(absl::string_view text) {
RIEGELI_ASSERT_UNREACHABLE() << "Unknown compression type: "
<< static_cast<unsigned>(compression_type_);
}());
options_parser.AddOption(
"brotli_encoder",
ValueParser::Enum(
{{"rbrotli_or_cbrotli", BrotliEncoder::kRBrotliOrCBrotli},
{"cbrotli", BrotliEncoder::kCBrotli},
{"rbrotli", BrotliEncoder::kRBrotli}},
&brotli_encoder_));
if (ABSL_PREDICT_FALSE(!options_parser.FromString(text))) {
return options_parser.status();
}
Expand All @@ -157,7 +166,7 @@ absl::Status CompressorOptions::FromString(absl::string_view text) {

int CompressorOptions::brotli_window_log() const {
RIEGELI_ASSERT(compression_type_ == CompressionType::kBrotli)
<< "Failed precodition of CompressorOptions::brotli_window_log(): "
<< "Failed precondition of CompressorOptions::brotli_window_log(): "
"compression type must be Brotli";
if (window_log_ == absl::nullopt) {
return BrotliWriterBase::Options::kDefaultWindowLog;
Expand All @@ -174,7 +183,7 @@ int CompressorOptions::brotli_window_log() const {

absl::optional<int> CompressorOptions::zstd_window_log() const {
RIEGELI_ASSERT(compression_type_ == CompressionType::kZstd)
<< "Failed precodition of CompressorOptions::zstd_window_log(): "
<< "Failed precondition of CompressorOptions::zstd_window_log(): "
"compression type must be Zstd";
if (window_log_ != absl::nullopt) {
RIEGELI_ASSERT_GE(*window_log_, ZstdWriterBase::Options::kMinWindowLog)
Expand Down
34 changes: 33 additions & 1 deletion riegeli/chunk_encoding/compressor_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,16 @@

namespace riegeli {

// The implementation of the Brotli encoder to use. Experimental, meant for
// evaluation. Prefer to keep the default.
//
// Rust Brotli is currently not available in open sourced Riegeli.
enum class BrotliEncoder {
kRBrotliOrCBrotli, // Rust Brotli if available, C Brotli otherwise. Default.
kCBrotli, // C Brotli.
kRBrotli, // Rust Brotli if available, fail otherwise.
};

class CompressorOptions {
public:
CompressorOptions() noexcept {}
Expand All @@ -41,7 +51,8 @@ class CompressorOptions {
// "brotli" (":" brotli_level)? |
// "zstd" (":" zstd_level)? |
// "snappy" (":" snappy_level)? |
// "window_log" ":" window_log
// "window_log" ":" window_log |
// "brotli_encoder" ":" ("rbrotli_or_cbrotli" | "cbrotli" | "rbrotli")
// brotli_level ::= integer in the range [0..11] (default 6)
// zstd_level ::= integer in the range [-131072..22] (default 3)
// snappy_level ::= integer in the range [1..2] (default 1)
Expand Down Expand Up @@ -192,10 +203,31 @@ class CompressorOptions {
// Precondition: `compression_type() == CompressionType::kZstd`
absl::optional<int> zstd_window_log() const;

// The implementation of the Brotli encoder to use. Experimental, meant for
// evaluation. Prefer to keep the default.
//
// This is ignored if `compression_type() != CompressionType::kBrotli`.
//
// If Rust Brotli is used, the interpretation of compression levels is
// slightly different (in particular compression levels smaller than 3 are
// equivalent to 3, and compression levels larger than 7 are equivalent to 7),
// and `window_log()` is ignored.
//
// Default: `BrotliEncoder::kRBrotliOrCBrotli`.
CompressorOptions& set_brotli_encoder(BrotliEncoder brotli_encoder) & {
brotli_encoder_ = brotli_encoder;
return *this;
}
CompressorOptions&& set_brotli_encoder(BrotliEncoder brotli_encoder) && {
return std::move(set_brotli_encoder(brotli_encoder));
}
BrotliEncoder brotli_encoder() const { return brotli_encoder_; }

private:
CompressionType compression_type_ = CompressionType::kBrotli;
int compression_level_ = kDefaultBrotli;
absl::optional<int> window_log_;
BrotliEncoder brotli_encoder_ = BrotliEncoder::kRBrotliOrCBrotli;
};

} // namespace riegeli
Expand Down
2 changes: 1 addition & 1 deletion riegeli/chunk_encoding/transpose_encoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ inline bool TransposeEncoder::AddBuffer(
.set_recycling_pool_options(recycling_pool_options_));
}
if (ABSL_PREDICT_FALSE(!bucket_compressor.writer().Write(buffer))) {
return Fail(bucket_compressor.status());
return Fail(bucket_compressor.writer().status());
}
return true;
}
Expand Down
2 changes: 2 additions & 0 deletions riegeli/records/record_writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,8 @@ absl::Status RecordWriterBase::Options::FromString(absl::string_view text) {
options_parser.AddOption("zstd", ValueParser::CopyTo(&compressor_text));
options_parser.AddOption("snappy", ValueParser::CopyTo(&compressor_text));
options_parser.AddOption("window_log", ValueParser::CopyTo(&compressor_text));
options_parser.AddOption("brotli_encoder",
ValueParser::CopyTo(&compressor_text));
options_parser.AddOption(
"chunk_size",
ValueParser::Or(
Expand Down
1 change: 1 addition & 0 deletions riegeli/records/record_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class RecordWriterBase : public Object {
// "zstd" (":" zstd_level)? |
// "snappy" (":" snappy_level)? |
// "window_log" ":" window_log |
// "brotli_encoder" ":" ("rbrotli_or_cbrotli" | "cbrotli" | "rbrotli") |
// "chunk_size" ":" chunk_size |
// "bucket_fraction" ":" bucket_fraction |
// "pad_to_block_boundary" (":" ("true" | "false" | "initially"))? |
Expand Down

0 comments on commit 7252ada

Please sign in to comment.