Skip to content

Commit

Permalink
test(clp-s): Add end-to-end test case for compression and extraction. (
Browse files Browse the repository at this point in the history
  • Loading branch information
AVMatthews authored and Jack Luo committed Dec 4, 2024
1 parent a1fd272 commit 95d4425
Show file tree
Hide file tree
Showing 6 changed files with 219 additions and 1 deletion.
54 changes: 53 additions & 1 deletion components/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,42 @@ add_subdirectory(src/clp_s)
add_subdirectory(src/reducer)

set(SOURCE_FILES_clp_s_unitTest
src/clp_s/ArchiveReader.cpp
src/clp_s/ArchiveReader.hpp
src/clp_s/ArchiveWriter.cpp
src/clp_s/ArchiveWriter.hpp
src/clp_s/ColumnReader.cpp
src/clp_s/ColumnReader.hpp
src/clp_s/ColumnWriter.cpp
src/clp_s/ColumnWriter.hpp
src/clp_s/DictionaryEntry.cpp
src/clp_s/DictionaryEntry.hpp
src/clp_s/DictionaryWriter.cpp
src/clp_s/DictionaryWriter.hpp
src/clp_s/FileReader.cpp
src/clp_s/FileReader.hpp
src/clp_s/FileWriter.cpp
src/clp_s/FileWriter.hpp
src/clp_s/JsonConstructor.cpp
src/clp_s/JsonConstructor.hpp
src/clp_s/JsonFileIterator.cpp
src/clp_s/JsonFileIterator.hpp
src/clp_s/JsonParser.cpp
src/clp_s/JsonParser.hpp
src/clp_s/PackedStreamReader.cpp
src/clp_s/PackedStreamReader.hpp
src/clp_s/ReaderUtils.cpp
src/clp_s/ReaderUtils.hpp
src/clp_s/Schema.cpp
src/clp_s/Schema.hpp
src/clp_s/SchemaMap.cpp
src/clp_s/SchemaMap.hpp
src/clp_s/SchemaReader.cpp
src/clp_s/SchemaReader.hpp
src/clp_s/SchemaTree.cpp
src/clp_s/SchemaTree.hpp
src/clp_s/SchemaWriter.cpp
src/clp_s/SchemaWriter.hpp
src/clp_s/search/AndExpr.cpp
src/clp_s/search/AndExpr.hpp
src/clp_s/search/BooleanLiteral.cpp
Expand Down Expand Up @@ -273,11 +309,24 @@ set(SOURCE_FILES_clp_s_unitTest
src/clp_s/search/StringLiteral.hpp
src/clp_s/search/Transformation.hpp
src/clp_s/search/Value.hpp
src/clp_s/SchemaTree.hpp
src/clp_s/TimestampDictionaryReader.cpp
src/clp_s/TimestampDictionaryReader.hpp
src/clp_s/TimestampDictionaryWriter.cpp
src/clp_s/TimestampDictionaryWriter.hpp
src/clp_s/TimestampEntry.cpp
src/clp_s/TimestampEntry.hpp
src/clp_s/TimestampPattern.cpp
src/clp_s/TimestampPattern.hpp
src/clp_s/Utils.cpp
src/clp_s/Utils.hpp
src/clp_s/VariableDecoder.cpp
src/clp_s/VariableDecoder.hpp
src/clp_s/VariableEncoder.cpp
src/clp_s/VariableEncoder.hpp
src/clp_s/ZstdCompressor.cpp
src/clp_s/ZstdCompressor.hpp
src/clp_s/ZstdDecompressor.cpp
src/clp_s/ZstdDecompressor.hpp
)

set(SOURCE_FILES_unitTest
Expand Down Expand Up @@ -499,6 +548,7 @@ set(SOURCE_FILES_unitTest
tests/LogSuppressor.hpp
tests/test-Array.cpp
tests/test-BufferedFileReader.cpp
tests/test-clp_s-end_to_end.cpp
tests/test-EncodedVariableInterpreter.cpp
tests/test-encoding_methods.cpp
tests/test-ffi_IrUnitHandlerInterface.cpp
Expand Down Expand Up @@ -542,6 +592,8 @@ target_link_libraries(unitTest
log_surgeon::log_surgeon
LibArchive::LibArchive
MariaDBClient::MariaDBClient
${MONGOCXX_TARGET}
simdjson
spdlog::spdlog
OpenSSL::Crypto
${sqlite_LIBRARY_DEPENDENCIES}
Expand Down
158 changes: 158 additions & 0 deletions components/core/tests/test-clp_s-end_to_end.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
#include <sys/wait.h>

#include <cstdlib>
#include <filesystem>
#include <string>
#include <string_view>
#include <vector>

#include <Catch2/single_include/catch2/catch.hpp>
#include <fmt/format.h>

#include "../src/clp_s/JsonConstructor.hpp"
#include "../src/clp_s/JsonParser.hpp"

constexpr std::string_view cTestEndToEndArchiveDirectory{"test-end-to-end-archive"};
constexpr std::string_view cTestEndToEndOutputDirectory{"test-end-to-end-out"};
constexpr std::string_view cTestEndToEndOutputSortedJson{"test-end-to-end_sorted.jsonl"};
constexpr std::string_view cTestEndToEndInputFileDirectory{"test_log_files"};
constexpr std::string_view cTestEndToEndInputFile{"test_no_floats_sorted.jsonl"};

namespace {
/**
* A class that deletes the directories and files created by test cases, both before and after each
* test case where the class is instantiated.
*/
class TestOutputCleaner {
public:
TestOutputCleaner() { delete_files(); }

~TestOutputCleaner() { delete_files(); }

// Delete copy & move constructors and assignment operators
TestOutputCleaner(TestOutputCleaner const&) = delete;
TestOutputCleaner(TestOutputCleaner&&) = delete;
auto operator=(TestOutputCleaner const&) -> TestOutputCleaner& = delete;
auto operator=(TestOutputCleaner&&) -> TestOutputCleaner& = delete;

private:
static void delete_files() {
std::filesystem::remove_all(cTestEndToEndArchiveDirectory);
std::filesystem::remove_all(cTestEndToEndOutputDirectory);
std::filesystem::remove(cTestEndToEndOutputSortedJson);
}
};

auto get_test_input_path_relative_to_tests_dir() -> std::filesystem::path;
auto get_test_input_local_path() -> std::string;
void compress(bool structurize_arrays);
auto extract() -> std::filesystem::path;
void compare(std::filesystem::path const& extracted_json_path);

auto get_test_input_path_relative_to_tests_dir() -> std::filesystem::path {
return std::filesystem::path{cTestEndToEndInputFileDirectory} / cTestEndToEndInputFile;
}

auto get_test_input_local_path() -> std::string {
std::filesystem::path const current_file_path{__FILE__};
auto const tests_dir{current_file_path.parent_path()};
return (tests_dir / get_test_input_path_relative_to_tests_dir()).string();
}

void compress(bool structurize_arrays) {
constexpr auto cDefaultTargetEncodedSize = 8ULL * 1024 * 1024 * 1024; // 8 GiB
constexpr auto cDefaultMaxDocumentSize = 512ULL * 1024 * 1024; // 512 MiB
constexpr auto cDefaultMinTableSize = 1ULL * 1024 * 1024; // 1 MiB
constexpr auto cDefaultCompressionLevel = 3;
constexpr auto cDefaultPrintArchiveStats = false;

std::filesystem::create_directory(cTestEndToEndArchiveDirectory);
REQUIRE((std::filesystem::is_directory(cTestEndToEndArchiveDirectory)));

clp_s::JsonParserOption parser_option{};
parser_option.file_paths.push_back(get_test_input_local_path());
parser_option.archives_dir = cTestEndToEndArchiveDirectory;
parser_option.target_encoded_size = cDefaultTargetEncodedSize;
parser_option.max_document_size = cDefaultMaxDocumentSize;
parser_option.min_table_size = cDefaultMinTableSize;
parser_option.compression_level = cDefaultCompressionLevel;
parser_option.print_archive_stats = cDefaultPrintArchiveStats;
parser_option.structurize_arrays = structurize_arrays;

clp_s::JsonParser parser{parser_option};
REQUIRE(parser.parse());
parser.store();

REQUIRE((false == std::filesystem::is_empty(cTestEndToEndArchiveDirectory)));
}

auto extract() -> std::filesystem::path {
constexpr auto cDefaultOrdered = false;
constexpr auto cDefaultTargetOrderedChunkSize = 0;

std::filesystem::create_directory(cTestEndToEndOutputDirectory);
REQUIRE(std::filesystem::is_directory(cTestEndToEndOutputDirectory));

clp_s::JsonConstructorOption constructor_option{};
constructor_option.archives_dir = cTestEndToEndArchiveDirectory;
constructor_option.output_dir = cTestEndToEndOutputDirectory;
constructor_option.ordered = cDefaultOrdered;
constructor_option.target_ordered_chunk_size = cDefaultTargetOrderedChunkSize;
for (auto const& entry : std::filesystem::directory_iterator(constructor_option.archives_dir)) {
if (false == entry.is_directory()) {
// Skip non-directories
continue;
}

constructor_option.archive_id = entry.path().filename();
clp_s::JsonConstructor constructor{constructor_option};
constructor.store();
}
std::filesystem::path extracted_json_path{cTestEndToEndOutputDirectory};
extracted_json_path /= "original";
REQUIRE(std::filesystem::exists(extracted_json_path));

return extracted_json_path;
}

// Silence the checks below since our use of `std::system` is safe in the context of testing.
// NOLINTBEGIN(cert-env33-c,concurrency-mt-unsafe)
void compare(std::filesystem::path const& extracted_json_path) {
int result{std::system("command -v jq >/dev/null 2>&1")};
REQUIRE((0 == result));
auto command = fmt::format(
"jq --sort-keys --compact-output '.' {} | sort > {}",
extracted_json_path.string(),
cTestEndToEndOutputSortedJson
);
result = std::system(command.c_str());
REQUIRE((0 == result));

REQUIRE((false == std::filesystem::is_empty(cTestEndToEndOutputSortedJson)));

result = std::system("command -v diff >/dev/null 2>&1");
REQUIRE((0 == result));
command = fmt::format(
"diff --unified {} {} > /dev/null",
cTestEndToEndOutputSortedJson,
get_test_input_local_path()
);
result = std::system(command.c_str());
REQUIRE((true == WIFEXITED(result)));
REQUIRE((0 == WEXITSTATUS(result)));
}

// NOLINTEND(cert-env33-c,concurrency-mt-unsafe)
} // namespace

TEST_CASE("clp-s-compress-extract-no-floats", "[clp-s][end-to-end]") {
auto structurize_arrays = GENERATE(true, false);

TestOutputCleaner const test_cleanup;

compress(structurize_arrays);

auto extracted_json_path = extract();

compare(extracted_json_path);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_jq_losslessly_represents":9824299763229016,"int64_min_jq_losslessly_represents":-9007199254740992,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true}
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_jq_losslessly_represents":9824299763229016,"int64_min_jq_losslessly_represents":-9007199254740992,"int8_max":127,"int8_min":-128,"nonempty_object":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_jq_losslessly_represents":9824299763229016,"int64_min_jq_losslessly_represents":-9007199254740992,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true}
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_jq_losslessly_represents":9824299763229016,"int64_min_jq_losslessly_represents":-9007199254740992,"int8_max":127,"int8_min":-128,"nonempty_object":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_jq_losslessly_represents":9824299763229016,"int64_min_jq_losslessly_represents":-9007199254740992,"int8_max":127,"int8_min":-128,"non_empty_object2":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_jq_losslessly_represents":9824299763229016,"int64_min_jq_losslessly_represents":-9007199254740992,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true}
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_jq_losslessly_represents":9824299763229016,"int64_min_jq_losslessly_represents":-9007199254740992,"int8_max":127,"int8_min":-128,"nonempty_array":[1,2,3,4,5],"null":null,"string":"short_string","true":true}
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@ set -u

dnf install -y \
cmake \
diffutils \
gcc-c++ \
git \
java-11-openjdk \
jq \
libarchive-devel \
libcurl-devel \
libzstd-devel \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
gcc \
gcc-10 \
git \
jq \
libcurl4 \
libcurl4-openssl-dev \
libmariadb-dev \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
curl \
build-essential \
git \
jq \
libboost-filesystem-dev \
libboost-iostreams-dev \
libboost-program-options-dev \
Expand Down

0 comments on commit 95d4425

Please sign in to comment.