Skip to content

Commit

Permalink
iceberg/tests: test case that uses generated valid_schema()
Browse files Browse the repository at this point in the history
Adds a simple test for serializing manifest files with a DataFileWriter
and reading it back with a DataFileReader. This is will ultimately be
what we'll use to serialize manifests, since the DataFileWriter is what
will write additional metadata[1][2] (like the Iceberg schema).

[1] https://github.com/redpanda-data/avro/blob/1410e79f9df61669c2d52f6d0643e6c35156e615/lang/c%2B%2B/impl/DataFile.cc#L246-L252
[2] https://iceberg.apache.org/spec/#manifests
  • Loading branch information
andrwng committed Jul 17, 2024
1 parent e6d456c commit af65819
Showing 1 changed file with 53 additions and 0 deletions.
53 changes: 53 additions & 0 deletions src/v/iceberg/tests/manifest_serialization_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0

#include "base/units.h"
#include "bytes/iobuf.h"
#include "iceberg/avro_utils.h"
#include "iceberg/manifest_entry.h"
#include "iceberg/manifest_file.h"

#include <seastar/core/temporary_buffer.hh>

#include <avro/DataFile.hh>
#include <avro/Stream.hh>
#include <gtest/gtest.h>

Expand Down Expand Up @@ -101,3 +103,54 @@ TEST(ManifestSerializationTest, TestManifestFile) {
EXPECT_EQ(manifest.existing_rows_count, dmanifest.existing_rows_count);
EXPECT_EQ(manifest.deleted_rows_count, dmanifest.deleted_rows_count);
}

TEST(ManifestSerializationTest, TestManifestAvroReaderWriter) {
const auto& manifest_file_schema = manifest_file::valid_schema();
manifest_file manifest;
manifest.manifest_path = "path/to/file";
manifest.partition_spec_id = 1;
manifest.content = 2;
manifest.sequence_number = 3;
manifest.min_sequence_number = 4;
manifest.added_snapshot_id = 5;
manifest.added_data_files_count = 6;
manifest.existing_data_files_count = 7;
manifest.deleted_data_files_count = 8;
manifest.added_rows_count = 9;
manifest.existing_rows_count = 10;
manifest.deleted_rows_count = 11;
std::map<std::string, std::string> metadata;
auto f1 = "{\"type\": \"dummyjson\"}";
auto f2 = "2";
metadata["f1"] = f1;
metadata["f2"] = f2;

iobuf buf;
auto out = std::make_unique<avro_iobuf_ostream>(4_KiB, &buf);
avro::DataFileWriter<manifest_file> writer(
std::move(out), manifest_file_schema, 16_KiB, avro::NULL_CODEC, metadata);
writer.write(manifest);
writer.flush();
auto in = std::make_unique<avro_iobuf_istream>(buf.copy());
avro::DataFileReader<manifest_file> reader(
std::move(in), manifest_file_schema);
manifest_file dmanifest;
reader.read(dmanifest);
EXPECT_STREQ(reader.getMetadata("f1")->c_str(), f1);
EXPECT_STREQ(reader.getMetadata("f2")->c_str(), f2);
EXPECT_EQ(manifest.manifest_path, dmanifest.manifest_path);
EXPECT_EQ(manifest.partition_spec_id, dmanifest.partition_spec_id);
EXPECT_EQ(manifest.content, dmanifest.content);
EXPECT_EQ(manifest.sequence_number, dmanifest.sequence_number);
EXPECT_EQ(manifest.min_sequence_number, dmanifest.min_sequence_number);
EXPECT_EQ(manifest.added_snapshot_id, dmanifest.added_snapshot_id);
EXPECT_EQ(
manifest.added_data_files_count, dmanifest.added_data_files_count);
EXPECT_EQ(
manifest.existing_data_files_count, dmanifest.existing_data_files_count);
EXPECT_EQ(
manifest.deleted_data_files_count, dmanifest.deleted_data_files_count);
EXPECT_EQ(manifest.added_rows_count, dmanifest.added_rows_count);
EXPECT_EQ(manifest.existing_rows_count, dmanifest.existing_rows_count);
EXPECT_EQ(manifest.deleted_rows_count, dmanifest.deleted_rows_count);
}

0 comments on commit af65819

Please sign in to comment.