From e44e0ad10e78ad1260906daec021f9eabdc6993e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 14 Feb 2024 17:02:16 +0100 Subject: [PATCH] Avoid excessive reallocation in row compressors (#6638) Memory operations can add up to tens of percents of the total compression CPU load. To reduce the need for them, reserve for the expected array sizes when initializing the compressor. --- src/adts/bit_array.h | 2 +- src/adts/bit_array_impl.h | 4 ++-- src/adts/vec.h | 7 +++++-- test/src/adt_tests.c | 2 +- tsl/src/compression/gorilla.c | 14 ++++++++++++-- tsl/src/compression/simple8b_rle.h | 16 ++++++++++++++-- 6 files changed, 35 insertions(+), 10 deletions(-) diff --git a/src/adts/bit_array.h b/src/adts/bit_array.h index 47b7f7cd77c..506f5d15f89 100644 --- a/src/adts/bit_array.h +++ b/src/adts/bit_array.h @@ -25,7 +25,7 @@ typedef struct BitArray BitArray; typedef struct BitArrayIterator BitArrayIterator; /* Main Interface */ -static void bit_array_init(BitArray *array); +static void bit_array_init(BitArray *array, int expected_bits); /* Append num_bits to the array */ static void bit_array_append(BitArray *array, uint8 num_bits, uint64 bits); diff --git a/src/adts/bit_array_impl.h b/src/adts/bit_array_impl.h index 8de4c7e844f..02087030ab2 100644 --- a/src/adts/bit_array_impl.h +++ b/src/adts/bit_array_impl.h @@ -45,12 +45,12 @@ static inline void bit_array_wrap_internal(BitArray *array, uint32 num_buckets, ************************/ static inline void -bit_array_init(BitArray *array) +bit_array_init(BitArray *array, int expected_bits) { *array = (BitArray){ .bits_used_in_last_bucket = 0, }; - uint64_vec_init(&array->buckets, CurrentMemoryContext, 0); + uint64_vec_init(&array->buckets, CurrentMemoryContext, expected_bits / 64); } /* This initializes the bit array by wrapping buckets. Note, that the bit array will diff --git a/src/adts/vec.h b/src/adts/vec.h index 0d6427e8932..8e851d8cef4 100644 --- a/src/adts/vec.h +++ b/src/adts/vec.h @@ -110,8 +110,11 @@ VEC_RESERVE(VEC_TYPE *vec, uint32 additional) if (num_new_elements == 0 || vec->num_elements + num_new_elements <= vec->max_elements) return; - if (num_new_elements < vec->num_elements / 2) - num_new_elements = vec->num_elements / 2; + if (num_new_elements < vec->num_elements) + { + /* Follow the usual doubling progression of allocation sizes. */ + num_new_elements = vec->num_elements; + } num_elements = vec->num_elements + num_new_elements; Assert(num_elements > vec->num_elements); diff --git a/test/src/adt_tests.c b/test/src/adt_tests.c index 4cb56c7f7bf..03ec2769380 100644 --- a/test/src/adt_tests.c +++ b/test/src/adt_tests.c @@ -105,7 +105,7 @@ bit_array_test(void) BitArray bits; BitArrayIterator iter; int i; - bit_array_init(&bits); + bit_array_init(&bits, 0); for (i = 0; i < 65; i++) bit_array_append(&bits, i, i); diff --git a/tsl/src/compression/gorilla.c b/tsl/src/compression/gorilla.c index fc386138579..9e0a95d7622 100644 --- a/tsl/src/compression/gorilla.c +++ b/tsl/src/compression/gorilla.c @@ -254,9 +254,19 @@ gorilla_compressor_alloc(void) GorillaCompressor *compressor = palloc(sizeof(*compressor)); simple8brle_compressor_init(&compressor->tag0s); simple8brle_compressor_init(&compressor->tag1s); - bit_array_init(&compressor->leading_zeros); + /* + * The number of leading zeros takes about 5 bits to encode, and changes + * maybe every 100 rows, so use this as a conservative estimate. + */ + bit_array_init(&compressor->leading_zeros, + /* expected_bits = */ (GLOBAL_MAX_ROWS_PER_COMPRESSION * 5) / 100); simple8brle_compressor_init(&compressor->bits_used_per_xor); - bit_array_init(&compressor->xors); + /* + * We typically see about 12 bits or 4 decimal digits per row for the "xors" + * part in gorilla compression. + */ + bit_array_init(&compressor->xors, + /* expected_bits = */ GLOBAL_MAX_ROWS_PER_COMPRESSION * 12); simple8brle_compressor_init(&compressor->nulls); compressor->has_nulls = false; compressor->prev_leading_zeroes = 0; diff --git a/tsl/src/compression/simple8b_rle.h b/tsl/src/compression/simple8b_rle.h index 04a6547d634..9ddd06b49a9 100644 --- a/tsl/src/compression/simple8b_rle.h +++ b/tsl/src/compression/simple8b_rle.h @@ -304,8 +304,20 @@ simple8brle_compressor_init(Simple8bRleCompressor *compressor) .num_elements = 0, .num_uncompressed_elements = 0, }; - uint64_vec_init(&compressor->compressed_data, CurrentMemoryContext, 0); - bit_array_init(&compressor->selectors); + /* + * It is good to have some estimate of the resulting size of compressed + * data, because it helps to allocate memory in advance to avoid frequent + * reallocations. Here we use a completely arbitrary but pretty realistic + * ratio of 10. + */ + const int expected_compression_ratio = 10; + uint64_vec_init(&compressor->compressed_data, + CurrentMemoryContext, + GLOBAL_MAX_ROWS_PER_COMPRESSION / expected_compression_ratio); + bit_array_init(&compressor->selectors, + /* expected_bits = */ (GLOBAL_MAX_ROWS_PER_COMPRESSION * + SIMPLE8B_BITS_PER_SELECTOR) / + expected_compression_ratio); } static void