From a7da00e86a4f4ed9cdbf438347d2df9ec5db1afe Mon Sep 17 00:00:00 2001 From: lujiashun Date: Thu, 27 Oct 2022 15:23:57 +0800 Subject: [PATCH] feat(tianmu): Improve the readbility of stonedb. (#11) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [summary] 1 class member variable refactor:compress directory; 2 class member variable refactor:types directory; --- storage/tianmu/compress/arith_coder.cpp | 125 +++--- storage/tianmu/compress/arith_coder.h | 30 +- storage/tianmu/compress/basic_data_filt.cpp | 117 +++--- storage/tianmu/compress/basic_data_filt.h | 34 +- .../tianmu/compress/bit_stream_compressor.cpp | 2 +- storage/tianmu/compress/code_stream.h | 30 +- storage/tianmu/compress/data_filt.h | 6 +- storage/tianmu/compress/data_stream.cpp | 23 +- storage/tianmu/compress/data_stream.h | 145 ++++--- storage/tianmu/compress/defs.h | 20 +- storage/tianmu/compress/dictionary.cpp | 97 ++--- storage/tianmu/compress/dictionary.h | 87 ++-- storage/tianmu/compress/inc_alloc.cpp | 56 +-- storage/tianmu/compress/inc_alloc.h | 46 +-- storage/tianmu/compress/inc_wgraph.cpp | 210 +++++----- storage/tianmu/compress/inc_wgraph.h | 54 +-- storage/tianmu/compress/num_compressor.h | 82 ++-- storage/tianmu/compress/part_dict.h | 2 +- storage/tianmu/compress/ppm.cpp | 111 ++--- storage/tianmu/compress/ppm.h | 8 +- storage/tianmu/compress/ppm_defs.h | 6 +- storage/tianmu/compress/range_code.h | 168 ++++---- storage/tianmu/compress/suffix_tree.cpp | 256 ++++++------ storage/tianmu/compress/suffix_tree.h | 103 ++--- storage/tianmu/compress/text_compressor.cpp | 47 +-- storage/tianmu/compress/text_compressor.h | 41 +- storage/tianmu/compress/top_bit_dict.cpp | 87 ++-- storage/tianmu/compress/top_bit_dict.h | 30 +- storage/tianmu/compress/word_graph.cpp | 384 +++++++++--------- storage/tianmu/compress/word_graph.h | 96 ++--- storage/tianmu/core/aggregator_advanced.cpp | 8 +- storage/tianmu/core/aggregator_basic.cpp | 64 +-- storage/tianmu/core/cached_buffer.cpp | 6 +- storage/tianmu/core/column_bin_encoder.cpp | 58 +-- storage/tianmu/core/condition_encoder.cpp | 4 +- storage/tianmu/core/descriptor.cpp | 4 +- storage/tianmu/core/engine_convert.cpp | 22 +- storage/tianmu/core/just_a_table.cpp | 2 +- storage/tianmu/core/rc_attr.cpp | 20 +- storage/tianmu/core/rcattr_exeq_rs.cpp | 56 +-- storage/tianmu/core/rcattr_exqp.cpp | 12 +- storage/tianmu/core/rsi_bloom.cpp | 2 +- storage/tianmu/core/rsi_cmap.cpp | 10 +- storage/tianmu/core/value_or_null.cpp | 14 +- storage/tianmu/exporter/data_exporter_txt.cpp | 8 +- storage/tianmu/loader/load_parser.cpp | 4 +- storage/tianmu/loader/value_cache.cpp | 3 +- storage/tianmu/system/channel_out.h | 2 +- storage/tianmu/types/bstring.cpp | 297 ++++++++------ storage/tianmu/types/rc_data_types.h | 127 +++--- storage/tianmu/types/rc_datetime.cpp | 248 +++++------ storage/tianmu/types/rc_num.cpp | 31 +- storage/tianmu/types/rc_num.h | 2 +- storage/tianmu/types/rc_value_object.cpp | 54 +-- storage/tianmu/types/value_parser4txt.cpp | 75 ++-- storage/tianmu/vc/const_column.cpp | 2 +- storage/tianmu/vc/single_column.cpp | 6 +- 57 files changed, 1886 insertions(+), 1758 deletions(-) diff --git a/storage/tianmu/compress/arith_coder.cpp b/storage/tianmu/compress/arith_coder.cpp index 2fca691e6..300073413 100644 --- a/storage/tianmu/compress/arith_coder.cpp +++ b/storage/tianmu/compress/arith_coder.cpp @@ -23,38 +23,39 @@ namespace Tianmu { namespace compress { void ArithCoder::InitCompress() { - low = 0; - high = 0xffff; - underflow_bits = 0; + low_ = 0; + high_ = 0xffff; + underflow_bits_ = 0; } // if possible, make normalization and send bits to the 'dest' CprsErr ArithCoder::ScaleRange(BitStream *dest, BaseT s_low, BaseT s_high, BaseT total) { - range = (WideT)(high - low) + 1; - high = low + (BaseT)((range * s_high) / total - 1); - low = low + (BaseT)((range * s_low) / total); - if (high < low) return CprsErr::CPRS_ERR_SUM; + range_ = (WideT)(high_ - low_) + 1; + high_ = low_ + (BaseT)((range_ * s_high) / total - 1); + low_ = low_ + (BaseT)((range_ * s_low) / total); + if (high_ < low_) + return CprsErr::CPRS_ERR_SUM; for (;;) { // the same MS bits - if ((high & 0x8000) == (low & 0x8000)) { - dest->PutBit(high >> 15); - while (underflow_bits > 0) { - dest->PutBit((~high & 0x8000) >> 15); - underflow_bits--; + if ((high_ & 0x8000) == (low_ & 0x8000)) { + dest->PutBit(high_ >> 15); + while (underflow_bits_ > 0) { + dest->PutBit((~high_ & 0x8000) >> 15); + underflow_bits_--; } } - // low=01... high=10... - else if ((low & 0x4000) && !(high & 0x4000)) { - underflow_bits++; - low &= 0x3fff; - high |= 0x4000; + // low_=01... high_=10... + else if ((low_ & 0x4000) && !(high_ & 0x4000)) { + underflow_bits_++; + low_ &= 0x3fff; + high_ |= 0x4000; } else return CprsErr::CPRS_SUCCESS; - low <<= 1; - high <<= 1; - high |= 1; + low_ <<= 1; + high_ <<= 1; + high_ |= 1; } } @@ -67,17 +68,17 @@ CprsErr ArithCoder::EncodeUniform(BitStream *dest, T val, T maxval, uint bitmax) // encode groups of 'uni_nbit' bits, from the least significant BaseT v; CprsErr err; - DEBUG_ASSERT(uni_total <= MAX_TOTAL); - while (bitmax > uni_nbit) { - v = (BaseT)(val & uni_mask); - err = ScaleRange(dest, v, v + (BaseT)1, uni_total); + DEBUG_ASSERT(uni_total_ <= MAX_TOTAL_); + while (bitmax > uni_nbit_) { + v = (BaseT)(val & uni_mask_); + err = ScaleRange(dest, v, v + (BaseT)1, uni_total_); if (static_cast(err)) return err; - val >>= uni_nbit; - maxval >>= uni_nbit; - bitmax -= uni_nbit; + val >>= uni_nbit_; + maxval >>= uni_nbit_; + bitmax -= uni_nbit_; } // encode the most significant group - DEBUG_ASSERT(maxval < MAX_TOTAL); + DEBUG_ASSERT(maxval < MAX_TOTAL_); err = ScaleRange(dest, (BaseT)val, (BaseT)val + (BaseT)1, (BaseT)maxval + (BaseT)1); if (static_cast(err)) return err; @@ -86,9 +87,9 @@ CprsErr ArithCoder::EncodeUniform(BitStream *dest, T val, T maxval, uint bitmax) // TODO: it was void ArithCoder::EndCompress(BitStream *dest) { - dest->PutBit((low & 0x4000) > 0); - underflow_bits++; - while (underflow_bits-- > 0) dest->PutBit(((~low) & 0x4000) > 0); + dest->PutBit((low_ & 0x4000) > 0); + underflow_bits_++; + while (underflow_bits_-- > 0) dest->PutBit(((~low_) & 0x4000) > 0); } CprsErr ArithCoder::CompressBytes(BitStream *dest, char *src, int slen, BaseT *sum, BaseT total) { @@ -102,7 +103,7 @@ CprsErr ArithCoder::CompressBytes(BitStream *dest, char *src, int slen, BaseT *s for (; slen > 0; slen--) { c = *(src++); err = ScaleRange(dest, sum[c], sum[c + 1], - total); // rescale high and low, send bits to dest + total); // rescale high_ and low_, send bits to dest if (static_cast(static_cast(err))) return err; } @@ -121,7 +122,7 @@ CprsErr ArithCoder::CompressBits(BitStream *dest, BitStream *src, BaseT *sum, Ba while (src->CanRead()) { c = src->GetBit(); err = ScaleRange(dest, sum[c], sum[c + 1], - total); // rescale high and low, send bits to dest + total); // rescale high_ and low_, send bits to dest if (static_cast(static_cast(err))) return err; } @@ -130,44 +131,44 @@ CprsErr ArithCoder::CompressBits(BitStream *dest, BitStream *src, BaseT *sum, Ba } void ArithCoder::InitDecompress(BitStream *src) { - low = 0; - high = 0xffff; - code = 0; - added = 0; + low_ = 0; + high_ = 0xffff; + code_ = 0; + added_ = 0; for (int i = 0; i < 16; i++) { - code <<= 1; + code_ <<= 1; if (src->CanRead()) - code |= src->GetBit(); + code_ |= src->GetBit(); else - added++; + added_++; } } // remove the symbol from the input CprsErr ArithCoder::RemoveSymbol(BitStream *src, BaseT s_low, BaseT s_high, BaseT total) { - high = low + (BaseT)((range * s_high) / total - 1); // TODO: optimize for decompression of bits - low = low + (BaseT)((range * s_low) / total); + high_ = low_ + (BaseT)((range_ * s_high) / total - 1); // TODO: optimize for decompression of bits + low_ = low_ + (BaseT)((range_ * s_low) / total); for (;;) { // the same MS bits - if ((high & 0x8000) == (low & 0x8000)) { + if ((high_ & 0x8000) == (low_ & 0x8000)) { } - // low=01... high=10... - else if ((low & 0x4000) && !(high & 0x4000)) { - code ^= 0x4000; - low &= 0x3fff; - high |= 0x4000; + // low_=01... high_=10... + else if ((low_ & 0x4000) && !(high_ & 0x4000)) { + code_ ^= 0x4000; + low_ &= 0x3fff; + high_ |= 0x4000; } else return CprsErr::CPRS_SUCCESS; - low <<= 1; - high <<= 1; - high |= 1; + low_ <<= 1; + high_ <<= 1; + high_ |= 1; - code <<= 1; + code_ <<= 1; if (src->CanRead()) - code |= src->GetBit(); - else if (++added > sizeof(BaseT) * 8) + code_ |= src->GetBit(); + else if (++added_ > sizeof(BaseT) * 8) return CprsErr::CPRS_ERR_BUF; } } @@ -181,20 +182,20 @@ CprsErr ArithCoder::DecodeUniform(BitStream *src, T &val, T maxval, uint bitmax) // decode groups of 'uni_nbit' bits, from the least significant BaseT v; CprsErr err; - DEBUG_ASSERT(uni_total <= MAX_TOTAL); + DEBUG_ASSERT(uni_total_ <= MAX_TOTAL_); uint shift = 0; - while (shift + uni_nbit < bitmax) { - v = GetCount(uni_total); - err = RemoveSymbol(src, v, v + (BaseT)1, uni_total); + while (shift + uni_nbit_ < bitmax) { + v = GetCount(uni_total_); + err = RemoveSymbol(src, v, v + (BaseT)1, uni_total_); if (static_cast(err)) return err; DEBUG_ASSERT(shift < 64); val |= (uint64_t)v << shift; - shift += uni_nbit; + shift += uni_nbit_; } // decode the most significant group BaseT total = (BaseT)(maxval _SHR_ shift) + (BaseT)1; - DEBUG_ASSERT(total <= MAX_TOTAL); + DEBUG_ASSERT(total <= MAX_TOTAL_); v = GetCount(total); err = RemoveSymbol(src, v, v + (BaseT)1, total); if (static_cast(err)) return err; @@ -260,8 +261,8 @@ CprsErr ArithCoder::DecompressBits(BitStream *dest, BitStream *src, BaseT *sum, } ArithCoder::BaseT ArithCoder::GetCount(BaseT total) { - range = (WideT)(high - low) + 1; - return (BaseT)((((WideT)(code - low) + 1) * total - 1) / range); + range_ = (WideT)(high_ - low_) + 1; + return (BaseT)((((WideT)(code_ - low_) + 1) * total - 1) / range_); } template diff --git a/storage/tianmu/compress/arith_coder.h b/storage/tianmu/compress/arith_coder.h index aa0775275..6e5354102 100644 --- a/storage/tianmu/compress/arith_coder.h +++ b/storage/tianmu/compress/arith_coder.h @@ -34,12 +34,12 @@ class ArithCoder { public: using BaseT = unsigned short; using WideT = unsigned long; - static const BaseT MAX_TOTAL = USHRT_MAX / 4; + static const BaseT MAX_TOTAL_ = USHRT_MAX / 4; // Saves encoded data to 'dest'. // sum - array of cumulated counts, starting with value 0. // Its size = highest_index_of_character_to_encode + 2 (1 for total) - // (contains 'low' for every character plus total), + // (contains 'low_' for every character plus total), // e.g. for characters 0 and 1 it is: { low0, low1, total } // total - total sum of counts CprsErr CompressBytes(BitStream *dest, char *src, int slen, BaseT *sum, BaseT total); @@ -60,24 +60,24 @@ class ArithCoder { // num0, uint num1); private: - BaseT low; - BaseT high; - BaseT code; - WideT range; - int underflow_bits; - unsigned int added; // number of '0' bits virtually "added" to the source - // during decompr. (added > 16, means error) + BaseT low_; + BaseT high_; + BaseT code_; + WideT range_; + int underflow_bits_; + unsigned int added_; // number of '0' bits virtually "added_" to the source + // during decompr. (added_ > 16, means error) // constants for uniform encoding: - static const uint uni_nbit = 13; - static const BaseT uni_mask = (1 << uni_nbit) - 1; - static const BaseT uni_total = 1 << uni_nbit; + static const uint uni_nbit_ = 13; + static const BaseT uni_mask_ = (1 << uni_nbit_) - 1; + static const BaseT uni_total_ = 1 << uni_nbit_; public: ArithCoder() { - low = high = code = 0; - range = 0; - underflow_bits = added = 0; + low_ = high_ = code_ = 0; + range_ = 0; + underflow_bits_ = added_ = 0; } ~ArithCoder() {} // compression methods diff --git a/storage/tianmu/compress/basic_data_filt.cpp b/storage/tianmu/compress/basic_data_filt.cpp index 920863d56..e0d85f07c 100644 --- a/storage/tianmu/compress/basic_data_filt.cpp +++ b/storage/tianmu/compress/basic_data_filt.cpp @@ -28,13 +28,13 @@ template bool DataFilt_RLE::Encode(RangeCoder *coder, DataSet *dataset) { T *data = dataset->data; uint nrec = dataset->nrec; - ASSERT(MAXBLEN < 65536, "should be 'MAXBLEN < 65536'"); + ASSERT(MAXBLEN_ < 65536, "should be 'MAXBLEN_ < 65536'"); // use RLE? - dict.InitInsert(); + dict_.InitInsert(); uint nrep = 0, nsamp = 0; for (uint i = 1; i < nrec; i += 5) - if (dict.Insert(data[i - 1])) { + if (dict_.Insert(data[i - 1])) { nsamp++; if (data[i] == data[i - 1]) nrep++; } else @@ -43,7 +43,7 @@ bool DataFilt_RLE::Encode(RangeCoder *coder, DataSet *dataset) { ASSERT(nsamp <= 65535, "should be 'nsamp <= 65535'"); short nkey; uint sum2 = 0; - auto keys = dict.GetKeys(nkey); + auto keys = dict_.GetKeys(nkey); for (short k = 0; k < nkey; k++) sum2 += keys[k].count * keys[k].count; if (nrep * nsamp < 5 * sum2) return false; @@ -52,30 +52,30 @@ bool DataFilt_RLE::Encode(RangeCoder *coder, DataSet *dataset) { ushort len = 1; T last = data[0]; for (uint i = 1; i < nrec; i++) - if ((data[i] == last) && (len < MAXBLEN)) + if ((data[i] == last) && (len < MAXBLEN_)) len++; else { AddLen(len); len = 1; - last = data[nblk] = data[i]; + last = data[nblk_] = data[i]; } AddLen(len); - dataset->nrec = nblk; + dataset->nrec = nblk_; // save version using 2 bits coder->EncodeUniShift((uchar)0, 2); // calculate and save cum counts // TODO: histogram alignment to power of 2 - lencnt[0] = 0; + lencnt_[0] = 0; uint bitmax = core::GetBitLen(nrec); - for (ushort i = 1; i <= MAXBLEN; i++) coder->EncodeUniShift(lencnt[i] += lencnt[i - 1], bitmax); - uint total = lencnt[MAXBLEN]; + for (ushort i = 1; i <= MAXBLEN_; i++) coder->EncodeUniShift(lencnt_[i] += lencnt_[i - 1], bitmax); + uint total = lencnt_[MAXBLEN_]; // encode block lengths - for (uint b = 0; b < nblk; b++) { - len = lens[b]; - coder->Encode(lencnt[len - 1], lencnt[len] - lencnt[len - 1], + for (uint b = 0; b < nblk_; b++) { + len = lens_[b]; + coder->Encode(lencnt_[len - 1], lencnt_[len] - lencnt_[len - 1], total); // TODO: EncodeShift } @@ -88,38 +88,39 @@ void DataFilt_RLE::Decode(RangeCoder *coder, DataSet *dataset) { if (ver > 0) throw CprsErr::CPRS_ERR_COR; // read cum counts - lencnt[0] = 0; - merge_nrec = dataset->nrec; - uint bitmax = core::GetBitLen(merge_nrec); - for (ushort i = 1; i <= MAXBLEN; i++) coder->DecodeUniShift(lencnt[i], bitmax); - uint total = lencnt[MAXBLEN]; + lencnt_[0] = 0; + merge_nrec_ = dataset->nrec; + uint bitmax = core::GetBitLen(merge_nrec_); + for (ushort i = 1; i <= MAXBLEN_; i++) coder->DecodeUniShift(lencnt_[i], bitmax); + uint total = lencnt_[MAXBLEN_]; // decode block lengths - nblk = 0; + nblk_ = 0; uint sumlen = 0; - while (sumlen < merge_nrec) { + while (sumlen < merge_nrec_) { uint c = coder->GetCount(total); ushort len = 1; - while (c >= lencnt[len]) len++; - coder->Decode(lencnt[len - 1], lencnt[len] - lencnt[len - 1], total); - sumlen += (lens[nblk++] = len); + while (c >= lencnt_[len]) len++; + coder->Decode(lencnt_[len - 1], lencnt_[len] - lencnt_[len - 1], total); + sumlen += (lens_[nblk_++] = len); } - if (sumlen > merge_nrec) throw CprsErr::CPRS_ERR_COR; - dataset->nrec = nblk; + if (sumlen > merge_nrec_) + throw CprsErr::CPRS_ERR_COR; + dataset->nrec = nblk_; } template void DataFilt_RLE::Merge(DataSet *dataset) { T *data = dataset->data; - uint nrec = merge_nrec; - nblk = dataset->nrec; - while (nblk > 0) { - T val = data[--nblk]; - for (ushort i = lens[nblk]; i > 0; i--) { + uint nrec = merge_nrec_; + nblk_ = dataset->nrec; + while (nblk_ > 0) { + T val = data[--nblk_]; + for (ushort i = lens_[nblk_]; i > 0; i--) { DEBUG_ASSERT(nrec > 0); data[--nrec] = val; } } - dataset->nrec = merge_nrec; + dataset->nrec = merge_nrec_; } template @@ -147,17 +148,18 @@ bool DataFilt_Min::Encode(RangeCoder *coder, DataSet *dataset) { } template void DataFilt_Min::Decode(RangeCoder *coder, DataSet *dataset) { - coder->DecodeUniform(minval, dataset->maxval); - if (minval == 0) throw CprsErr::CPRS_ERR_COR; - dataset->maxval -= minval; + coder->DecodeUniform(minval_, dataset->maxval); + if (minval_ == 0) + throw CprsErr::CPRS_ERR_COR; + dataset->maxval -= minval_; } template void DataFilt_Min::Merge(DataSet *dataset) { - ASSERT(minval > 0, "should be 'minval > 0'"); + ASSERT(minval_ > 0, "should be 'minval_ > 0'"); T *data = dataset->data; uint nrec = dataset->nrec; - for (uint i = 0; i < nrec; i++) data[i] += minval; - dataset->maxval += minval; + for (uint i = 0; i < nrec; i++) data[i] += minval_; + dataset->maxval += minval_; } //-------------------------------------------------------------------------------------------- @@ -203,17 +205,18 @@ bool DataFilt_GCD::Encode(RangeCoder *coder, DataSet *dataset) { } template void DataFilt_GCD::Decode(RangeCoder *coder, DataSet *dataset) { - coder->DecodeUniform(gcd, dataset->maxval); - if (gcd <= 1) throw CprsErr::CPRS_ERR_COR; - dataset->maxval /= gcd; + coder->DecodeUniform(gcd_, dataset->maxval); + if (gcd_ <= 1) + throw CprsErr::CPRS_ERR_COR; + dataset->maxval /= gcd_; } template void DataFilt_GCD::Merge(DataSet *dataset) { - ASSERT(gcd > 1, "should be 'gcd > 1'"); + ASSERT(gcd_ > 1, "should be 'gcd_ > 1'"); T *data = dataset->data; uint nrec = dataset->nrec; - for (uint i = 0; i < nrec; i++) data[i] *= gcd; - dataset->maxval *= gcd; + for (uint i = 0; i < nrec; i++) data[i] *= gcd_; + dataset->maxval *= gcd_; } //-------------------------------------------------------------------------------------------- @@ -222,15 +225,15 @@ template double DataFilt_Diff::Entropy(T *data, uint nrec, [[maybe_unused]] uchar bitdict, uchar bitlow, bool top) { uchar mask = ((uchar)1 _SHL_ bitlow) - 1; ASSERT(bitlow < sizeof(T) * 8, "should be 'bitlow < sizeof(T)*8'"); - dict.InitInsert(); + dict_.InitInsert(); if (top) - for (uint i = 0; i < nrec; i++) dict.Insert((uchar)(data[i] _SHR_ bitlow)); + for (uint i = 0; i < nrec; i++) dict_.Insert((uchar)(data[i] _SHR_ bitlow)); else - for (uint i = 0; i < nrec; i++) dict.Insert((uchar)data[i] & mask); + for (uint i = 0; i < nrec; i++) dict_.Insert((uchar)data[i] & mask); double len = core::QuickMath::nlog2n(nrec); short nkey; - auto keys = dict.GetKeys(nkey); + auto keys = dict_.GetKeys(nkey); for (short k = 0; k < nkey; k++) len -= core::QuickMath::nlog2n(keys[k].count); return len; } @@ -238,7 +241,7 @@ template double DataFilt_Diff::Measure(DataSet *dataset, bool diff) { T *data = dataset->data; uint nrec = dataset->nrec; - uint nsamp = (nrec - 1 < MAXSAMP) ? nrec - 1 : MAXSAMP; + uint nsamp = (nrec - 1 < MAXSAMP_) ? nrec - 1 : MAXSAMP_; uint step = (nrec - 1) / nsamp; uint i = 1, j = 0; @@ -246,24 +249,26 @@ double DataFilt_Diff::Measure(DataSet *dataset, bool diff) { T maxval1 = dataset->maxval + 1; for (; j < nsamp; i += step, j++) { DEBUG_ASSERT(i < nrec); - if ((sample[j] = data[i] - data[i - 1]) > data[i]) sample[j] += maxval1; + if ((sample_[j] = data[i] - data[i - 1]) > data[i]) + sample_[j] += maxval1; } } else for (; j < nsamp; i += step, j++) { DEBUG_ASSERT(i < nrec); - sample[j] = data[i]; + sample_[j] = data[i]; } - ASSERT(j <= MAXSAMP, "should be 'j <= MAXSAMP'"); + ASSERT(j <= MAXSAMP_, "should be 'j <= MAXSAMP_'"); uint nbit = core::GetBitLen(dataset->maxval); - if (nbit <= BITDICT) return Entropy(sample, j, nbit, 0, true); + if (nbit <= BITDICT_) + return Entropy(sample_, j, nbit, 0, true); - double x = Entropy(sample, j, BITDICT, nbit - BITDICT, true); - if (nbit <= 2 * BITDICT) - x += Entropy(sample, j, nbit - BITDICT, nbit - BITDICT, false); + double x = Entropy(sample_, j, BITDICT_, nbit - BITDICT_, true); + if (nbit <= 2 * BITDICT_) + x += Entropy(sample_, j, nbit - BITDICT_, nbit - BITDICT_, false); else - x += Entropy(sample, j, BITDICT, BITDICT, false); + x += Entropy(sample_, j, BITDICT_, BITDICT_, false); return x; } diff --git a/storage/tianmu/compress/basic_data_filt.h b/storage/tianmu/compress/basic_data_filt.h index fa2515cb0..e195b6d09 100644 --- a/storage/tianmu/compress/basic_data_filt.h +++ b/storage/tianmu/compress/basic_data_filt.h @@ -34,20 +34,20 @@ namespace compress { /* RLE */ template class DataFilt_RLE : public DataFilt { - static const ushort MAXBLEN = 16; - ushort lens[CPRS_MAXREC]; - uint nblk; - uint lencnt[MAXBLEN + 1]; - uint merge_nrec; - Dictionary dict; + static const ushort MAXBLEN_ = 16; + ushort lens_[CPRS_MAXREC]; + uint nblk_; + uint lencnt_[MAXBLEN_ + 1]; + uint merge_nrec_; + Dictionary dict_; void Clear() { - std::memset(lencnt, 0, sizeof(lencnt)); - nblk = 0; + std::memset(lencnt_, 0, sizeof(lencnt_)); + nblk_ = 0; } void AddLen(ushort len) { - lens[nblk++] = len; - lencnt[len]++; + lens_[nblk_++] = len; + lencnt_[len]++; } public: @@ -63,7 +63,7 @@ class DataFilt_RLE : public DataFilt { /* Subtracting minimum from data */ template class DataFilt_Min : public DataFilt { - T minval; + T minval_; public: char const *GetName() override { return "min"; } @@ -75,10 +75,10 @@ class DataFilt_Min : public DataFilt { /* Dividing data by GCD */ template class DataFilt_GCD : public DataFilt { - T gcd; + T gcd_; public: - char const *GetName() override { return "gcd"; } + char const *GetName() override { return "gcd_"; } bool Encode(RangeCoder *coder, DataSet *dataset) override; void Decode(RangeCoder *coder, DataSet *dataset) override; void Merge(DataSet *dataset) override; @@ -87,10 +87,10 @@ class DataFilt_GCD : public DataFilt { /* Data differencing */ template class DataFilt_Diff : public DataFilt { - static const uint MAXSAMP = 65536 / 20; - static const uchar BITDICT = 8; - T sample[MAXSAMP]; - Dictionary dict; + static const uint MAXSAMP_ = 65536 / 20; + static const uchar BITDICT_ = 8; + T sample_[MAXSAMP_]; + Dictionary dict_; // T newmin, newmax, merge_maxval; // static int compare(const void* p1, const void* p2); // for // sorting array in increasing order diff --git a/storage/tianmu/compress/bit_stream_compressor.cpp b/storage/tianmu/compress/bit_stream_compressor.cpp index d54d5e118..cfb05e402 100644 --- a/storage/tianmu/compress/bit_stream_compressor.cpp +++ b/storage/tianmu/compress/bit_stream_compressor.cpp @@ -34,7 +34,7 @@ double BitstreamCompressor::Entropy(double p) { void BitstreamCompressor::GetSumTable(unsigned short *sum, unsigned int num0, unsigned int num1) { unsigned int cnt0 = num0, cnt1 = num1; - uint maxtotal = ArithCoder::MAX_TOTAL; + uint maxtotal = ArithCoder::MAX_TOTAL_; // reduce cnt0 and cnt1 so that their 'total' is small enough for the coder while (cnt0 + cnt1 > maxtotal) { diff --git a/storage/tianmu/compress/code_stream.h b/storage/tianmu/compress/code_stream.h index 89597f322..2f49a8963 100644 --- a/storage/tianmu/compress/code_stream.h +++ b/storage/tianmu/compress/code_stream.h @@ -26,61 +26,61 @@ namespace compress { // All the methods may throw exceptions of type CprsErr or ErrBufOverrun class CoderStream : protected ArithCoder { - BitStream my_stream; - BitStream *str; + BitStream my_stream_; + BitStream *str_; public: void Reset(char *buf, uint len, uint pos = 0) { - my_stream.Reset(buf, len, pos); - str = &my_stream; + my_stream_.Reset(buf, len, pos); + str_ = &my_stream_; } - void Reset(BitStream *str_ = 0) { str = str_; } + void Reset(BitStream *str_ = 0) { str_ = str_; } CoderStream() { Reset(); } CoderStream(char *buf, uint len, uint pos = 0) { Reset(buf, len, pos); } CoderStream(BitStream *str_) { Reset(str_); } virtual ~CoderStream() {} using ArithCoder::BaseT; - using ArithCoder::MAX_TOTAL; + using ArithCoder::MAX_TOTAL_; // stream access methods - uint GetPos() { return str->GetPos(); } + uint GetPos() { return str_->GetPos(); } // compression methods void InitCompress() { ArithCoder::InitCompress(); } void Encode(BaseT low, BaseT high, BaseT total) { - CprsErr err = ArithCoder::ScaleRange(str, low, high, total); + CprsErr err = ArithCoder::ScaleRange(str_, low, high, total); if (static_cast(err)) throw err; } - void EndCompress() { ArithCoder::EndCompress(str); } + void EndCompress() { ArithCoder::EndCompress(str_); } // decompression methods - void InitDecompress() { ArithCoder::InitDecompress(str); } + void InitDecompress() { ArithCoder::InitDecompress(str_); } BaseT GetCount(BaseT total) { return ArithCoder::GetCount(total); } void Decode(BaseT low, BaseT high, BaseT total) { - CprsErr err = ArithCoder::RemoveSymbol(str, low, high, total); + CprsErr err = ArithCoder::RemoveSymbol(str_, low, high, total); if (static_cast(err)) throw err; } // uniform compression and decompression template void EncodeUniform(T val, T maxval, uint bitmax) { - CprsErr err = ArithCoder::EncodeUniform(str, val, maxval, bitmax); + CprsErr err = ArithCoder::EncodeUniform(str_, val, maxval, bitmax); if (static_cast(err)) throw err; } template void EncodeUniform(T val, T maxval) { - CprsErr err = ArithCoder::EncodeUniform(str, val, maxval); + CprsErr err = ArithCoder::EncodeUniform(str_, val, maxval); if (static_cast(err)) throw err; } template void DecodeUniform(T &val, T maxval, uint bitmax) { - CprsErr err = ArithCoder::DecodeUniform(str, val, maxval, bitmax); + CprsErr err = ArithCoder::DecodeUniform(str_, val, maxval, bitmax); if (static_cast(err)) throw err; } template void DecodeUniform(T &val, T maxval) { - CprsErr err = ArithCoder::DecodeUniform(str, val, maxval); + CprsErr err = ArithCoder::DecodeUniform(str_, val, maxval); if (static_cast(err)) throw err; } }; diff --git a/storage/tianmu/compress/data_filt.h b/storage/tianmu/compress/data_filt.h index 48b74735e..5ffcbd8d2 100644 --- a/storage/tianmu/compress/data_filt.h +++ b/storage/tianmu/compress/data_filt.h @@ -36,9 +36,9 @@ struct DataSet { /* Part of DataFilt not dependent on template type */ class DataFiltNoTemp { public: - uint codesize[2]; // size of encoded data: description part [0], data part - // [1] (in bytes) - virtual void ClearStats() { IFSTAT(codesize[0] = codesize[1] = 0); } + uint codesize_[2]; // size of encoded data: description part [0], data part + // [1] (in bytes) + virtual void ClearStats() { IFSTAT(codesize_[0] = codesize_[1] = 0); } virtual char const *GetName() = 0; virtual ~DataFiltNoTemp() {} }; diff --git a/storage/tianmu/compress/data_stream.cpp b/storage/tianmu/compress/data_stream.cpp index ceb4120cd..13805504e 100644 --- a/storage/tianmu/compress/data_stream.cpp +++ b/storage/tianmu/compress/data_stream.cpp @@ -24,27 +24,30 @@ void BitStream::ZeroBits(uint beg, uint end) { uint byte1 = beg >> 3, bit1 = 8 - (beg & 7), byte2 = end >> 3, bit2 = end & 7; if (byte1 < byte2) { - (buf[byte1] _SHL_ASSIGN_ bit1) _SHR_ASSIGN_ bit1; // clear 'bit1' upper bits of 'byte1' - if (bit2) (buf[byte2] >>= bit2) <<= bit2; // clear 'bit2' lower bits of 'byte2' - std::memset(buf + byte1 + 1, 0, + (buf_[byte1] _SHL_ASSIGN_ bit1) _SHR_ASSIGN_ bit1; // clear 'bit1' upper bits of 'byte1' + if (bit2) + (buf_[byte2] >>= bit2) <<= bit2; // clear 'bit2' lower bits of 'byte2' + std::memset(buf_ + byte1 + 1, 0, byte2 - byte1 - 1); // clear the rest of bytes, in the middle } else if (beg < end) { // beggining and end are in the same byte - uchar t = buf[byte1]; + uchar t = buf_[byte1]; bit1 = 8 - bit1; bit2 = 8 - bit2; t _SHR_ASSIGN_ bit1; t _SHL_ASSIGN_(bit1 + bit2); t >>= bit2; - buf[byte1] -= t; + buf_[byte1] -= t; } } void BitStream::ClearBits() { - clrlen = 2 * pos + 64; - clrlen = (clrlen / 8) * 8; // round to the whole byte - if ((clrlen <= pos) || (clrlen > len)) clrlen = len; - if (clrlen <= pos) BufOverrun(); - ZeroBits(pos, clrlen); + clrlen_ = 2 * pos_ + 64; + clrlen_ = (clrlen_ / 8) * 8; // round to the whole byte + if ((clrlen_ <= pos_) || (clrlen_ > len_)) + clrlen_ = len_; + if (clrlen_ <= pos_) + BufOverrun(); + ZeroBits(pos_, clrlen_); } } // namespace compress diff --git a/storage/tianmu/compress/data_stream.h b/storage/tianmu/compress/data_stream.h index afe600994..4d6ca978f 100644 --- a/storage/tianmu/compress/data_stream.h +++ b/storage/tianmu/compress/data_stream.h @@ -31,35 +31,35 @@ namespace compress { // The bits in a byte are counted from the Least Signif. to the MS. class DataStream { protected: - uchar *buf; - uint len, // length of 'buf', in bits - pos; // number of bits of buf already read/wrote, counting from the beg. - // of buf (0 <= pos < len) + uchar *buf_; + uint len_, // length of 'buf_', in bits + pos_; // number of bits of buf_ already read/wrote, counting from the beg. + // of buf_ (0 <= pos_ < len_) virtual void BufOverrun() { throw ErrBufOverrun(); } public: using uint64_t = unsigned long long; - void Reset() { pos = 0; } + void Reset() { pos_ = 0; } void Reset(uint len_, uint pos_ = 0) { - len = len_; - pos = pos_; + len_ = len_; + pos_ = pos_; } - void Reset(char *buf_, uint len_, uint pos_ = 0) { - buf = (uchar *)buf_; - len = len_; - pos = pos_; + void Reset(char *buf, uint len, uint pos = 0) { + buf_ = (uchar *)buf; + len_ = len; + pos_ = pos; } - DataStream() : buf(NULL) { Reset(0, 0, 0); } - DataStream(char *buf_, uint len_, uint pos_ = 0) : buf(NULL) { Reset(buf_, len_, pos_); } + DataStream() : buf_(NULL) { Reset(0, 0, 0); } + DataStream(char *buf, uint len, uint pos = 0) : buf_(NULL) { Reset(buf, len, pos); } virtual ~DataStream() {} - char *GetBuf() { return (char *)buf; } - uint GetPos() { return pos; } - void SetPos(uint pos_) { pos = pos_; } - bool CanRead() { return pos < len; } - bool CanWrite() { return pos < len; } + char *GetBuf() { return (char *)buf_; } + uint GetPos() { return pos_; } + void SetPos(uint pos_) { pos_ = pos_; } + bool CanRead() { return pos_ < len_; } + bool CanWrite() { return pos_ < len_; } virtual uchar Get() = 0; virtual void Put(uchar v) = 0; }; @@ -69,13 +69,13 @@ class DataStream { // During writing, some more bits than is needed can be cleared in advance - due // to efficiency reasons. class BitStream : public DataStream { - uint clrlen; // index of the last cleared bit of 'buf' plus 1 (0 - no bit - // cleared yet) + uint clrlen_; // index of the last cleared bit of 'buf_' plus 1 (0 - no bit + // cleared yet) - // clear bits of 'buf', from bit no. 'beg' to 'end'-1; + // clear bits of 'buf_', from bit no. 'beg' to 'end'-1; // there is no checking of buffer overrun void ZeroBits(uint beg, uint end); - // clear some number of bits in advance, beggining from 'pos', to allow faster + // clear some number of bits in advance, beggining from 'pos_', to allow faster // Put operations void ClearBits(); @@ -83,15 +83,15 @@ class BitStream : public DataStream { // len_, pos_ - numbers of BITS void Reset() { DataStream::Reset(); - clrlen = 0; + clrlen_ = 0; } void Reset(uint len_, uint pos_ = 0) { DataStream::Reset(len_, pos_); - clrlen = 0; + clrlen_ = 0; } void Reset(char *buf_, uint len_, uint pos_ = 0) { DataStream::Reset(buf_, len_, pos_); - clrlen = 0; + clrlen_ = 0; } // len_, pos_ - numbers of BITS @@ -100,11 +100,11 @@ class BitStream : public DataStream { virtual ~BitStream() {} uchar NextBit(); // show the next bit, but don't remove it from the stream uchar NextByte(); // show the next byte, but don't remove it from the stream - void SkipBit(uint skip = 1); // advances 'pos', checks for buffer overflow + void SkipBit(uint skip = 1); // advances 'pos_', checks for buffer overflow // these two methods are NOT virtual, so can be used directly to slightly // increase efficiency - uchar GetBit(); // returns the next bit in the Least Signif Bit; advances 'pos' + uchar GetBit(); // returns the next bit in the Least Signif Bit; advances 'pos_' void PutBit(uchar b); // appends the Least Signif. Bit of 'b' to the data void PutBit0(); void PutBit1(); @@ -119,7 +119,7 @@ class BitStream : public DataStream { void FastPutBlock(char *data, uint n); // append 'n' bytes of 'data', starting at the - // byte boundary in 'buf' + // byte boundary in 'buf_' void FastGetBlock(char *data, uint n); // read 'n' bytes to 'data' uchar Get() override { return GetBit(); } @@ -127,65 +127,74 @@ class BitStream : public DataStream { }; inline uchar BitStream::NextBit() { - if (pos >= len) BufOverrun(); - return (buf[pos >> 3] >> (pos & 7)) & 1; + if (pos_ >= len_) + BufOverrun(); + return (buf_[pos_ >> 3] >> (pos_ & 7)) & 1; } inline uchar BitStream::NextByte() { - if (pos + 7 >= len) BufOverrun(); + if (pos_ + 7 >= len_) + BufOverrun(); unsigned char result = 0; - result |= (buf[pos >> 3] >> pos % 8); - result |= (buf[(pos >> 3) + 1] _SHL_(8 - pos % 8)); + result |= (buf_[pos_ >> 3] >> pos_ % 8); + result |= (buf_[(pos_ >> 3) + 1] _SHL_(8 - pos_ % 8)); return result; } inline void BitStream::SkipBit(uint skip) { - if ((pos > pos + skip) || ((pos += skip) > len)) BufOverrun(); + if ((pos_ > pos_ + skip) || ((pos_ += skip) > len_)) + BufOverrun(); } inline uchar BitStream::GetBit() { - if (pos >= len) BufOverrun(); - uchar result = (buf[pos >> 3] >> (pos & 7)) & 1; - pos++; + if (pos_ >= len_) + BufOverrun(); + uchar result = (buf_[pos_ >> 3] >> (pos_ & 7)) & 1; + pos_++; return result; } inline void BitStream::PutBit(uchar b) { - if (pos >= clrlen) ClearBits(); - buf[pos >> 3] |= ((b & 1) << (pos & 7)); - pos++; + if (pos_ >= clrlen_) + ClearBits(); + buf_[pos_ >> 3] |= ((b & 1) << (pos_ & 7)); + pos_++; } inline void BitStream::PutBit0() { - if (pos >= clrlen) ClearBits(); - pos++; + if (pos_ >= clrlen_) + ClearBits(); + pos_++; } inline void BitStream::PutBit1() { - if (pos >= clrlen) ClearBits(); - buf[pos >> 3] |= (1 << (pos & 7)); - pos++; + if (pos_ >= clrlen_) + ClearBits(); + buf_[pos_ >> 3] |= (1 << (pos_ & 7)); + pos_++; } inline void BitStream::PutByte(uchar b) { - if (pos + 7 >= clrlen) ClearBits(); - if (pos & 7) { - buf[pos >> 3] |= (b << (pos & 7)); - buf[(pos >> 3) + 1] |= (b _SHR_(8 - (pos & 7))); + if (pos_ + 7 >= clrlen_) + ClearBits(); + if (pos_ & 7) { + buf_[pos_ >> 3] |= (b << (pos_ & 7)); + buf_[(pos_ >> 3) + 1] |= (b _SHR_(8 - (pos_ & 7))); } else - buf[pos >> 3] = b; - pos += 8; + buf_[pos_ >> 3] = b; + pos_ += 8; } inline uchar BitStream::GetByte() { - if (pos + 7 >= len) BufOverrun(); + if (pos_ + 7 >= len_) + BufOverrun(); uchar result = 0; - if (pos & 7) { - result |= (buf[pos >> 3] >> (pos & 7)); - result |= (buf[(pos >> 3) + 1] _SHL_(8 - (pos & 7))); + if (pos_ & 7) { + result |= (buf_[pos_ >> 3] >> (pos_ & 7)); + result |= (buf_[(pos_ >> 3) + 1] _SHL_(8 - (pos_ & 7))); } else - result = buf[pos >> 3]; - pos += 8; + result = buf_[pos_ >> 3]; + pos_ += 8; return result; } @@ -222,19 +231,21 @@ inline unsigned long long BitStream::GetUInt64(unsigned short int no_bits) { } inline void BitStream::FastPutBlock(char *data, uint n) { - pos = ((pos + 7) / 8) * 8; // round 'pos' up to the byte boundary - uint pos1 = pos + 8 * n; - if ((pos > pos1) || (pos1 > len)) BufOverrun(); - std::memcpy(buf + pos / 8, data, n); - pos = pos1; + pos_ = ((pos_ + 7) / 8) * 8; // round 'pos_' up to the byte boundary + uint pos1 = pos_ + 8 * n; + if ((pos_ > pos1) || (pos1 > len_)) + BufOverrun(); + std::memcpy(buf_ + pos_ / 8, data, n); + pos_ = pos1; } inline void BitStream::FastGetBlock(char *data, uint n) { - pos = ((pos + 7) / 8) * 8; // round 'pos' up to the byte boundary - uint pos1 = pos + 8 * n; - if ((pos > pos1) || (pos1 > len)) BufOverrun(); - std::memcpy(data, buf + pos / 8, n); - pos = pos1; + pos_ = ((pos_ + 7) / 8) * 8; // round 'pos_' up to the byte boundary + uint pos1 = pos_ + 8 * n; + if ((pos_ > pos1) || (pos1 > len_)) + BufOverrun(); + std::memcpy(data, buf_ + pos_ / 8, n); + pos_ = pos1; } } // namespace compress diff --git a/storage/tianmu/compress/defs.h b/storage/tianmu/compress/defs.h index cc311d394..387369b89 100644 --- a/storage/tianmu/compress/defs.h +++ b/storage/tianmu/compress/defs.h @@ -53,40 +53,40 @@ namespace compress { class _SHIFT_CHECK_ { public: - unsigned long long v; - explicit _SHIFT_CHECK_(unsigned long long a) : v(a) {} + unsigned long long v_; + explicit _SHIFT_CHECK_(unsigned long long a) : v_(a) {} }; template inline T operator>>(T a, _SHIFT_CHECK_ b) { - if (b.v >= sizeof(T) * 8) { + if (b.v_ >= sizeof(T) * 8) { return 0; } - return a >> b.v; + return a >> b.v_; } template inline T &operator>>=(T &a, _SHIFT_CHECK_ b) { - if (b.v >= sizeof(T) * 8) { + if (b.v_ >= sizeof(T) * 8) { a = (T)0; return a; } - return a >>= b.v; + return a >>= b.v_; } template inline T operator<<(T a, _SHIFT_CHECK_ b) { - if (b.v >= sizeof(T) * 8) { + if (b.v_ >= sizeof(T) * 8) { return 0; } - return a << b.v; + return a << b.v_; } template inline T &operator<<=(T &a, _SHIFT_CHECK_ b) { - if (b.v >= sizeof(T) * 8) { + if (b.v_ >= sizeof(T) * 8) { a = (T)0; return a; } - return a <<= b.v; + return a <<= b.v_; } #ifndef _SHR_ diff --git a/storage/tianmu/compress/dictionary.cpp b/storage/tianmu/compress/dictionary.cpp index 32ddfd6e8..03ca77927 100644 --- a/storage/tianmu/compress/dictionary.cpp +++ b/storage/tianmu/compress/dictionary.cpp @@ -27,15 +27,15 @@ namespace compress { template void Dictionary::Clear() { - static_assert(MAXTOTAL > MAXKEYS + 1, "should be 'MAXTOTAL > MAXKEYS+1'"); - nkeys = 0; - std::memset(buckets, -1, sizeof(buckets)); - compress = decompress = false; + static_assert(MAX_TOTAL_ > MAX_KEYS_ + 1, "should be 'MAX_TOTAL_ > MAX_KEYS_+1'"); + n_keys_ = 0; + std::memset(buckets_, -1, sizeof(buckets_)); + compress_ = decompress_ = false; } template Dictionary::Dictionary() { - static_assert(MAXKEYS < SHRT_MAX, "should be 'MAXKEYS < SHRT_MAX'"); + static_assert(MAX_KEYS_ < SHRT_MAX, "should be 'MAX_KEYS_ < SHRT_MAX'"); Clear(); } @@ -43,43 +43,43 @@ Dictionary::Dictionary() { template void Dictionary::SetLows() { - // sort keys by descending 'count' + // sort keys_ by descending 'count' uint sumcnt = 0, total = 0; - for (short i = 0; i < nkeys; i++) { - order[i] = &keys[i]; - sumcnt += keys[i].count; - DEBUG_ASSERT(keys[i].count > 0); + for (short i = 0; i < n_keys_; i++) { + order_[i] = &keys_[i]; + sumcnt += keys_[i].count; + DEBUG_ASSERT(keys_[i].count > 0); } - qsort_tianmu(order, nkeys, sizeof(*order), compare); + qsort_tianmu(order_, n_keys_, sizeof(*order_), compare); - ASSERT(sumcnt <= MAXTOTAL, "should be 'sumcnt <= MAXTOTAL'"); + ASSERT(sumcnt <= MAX_TOTAL_, "should be 'sumcnt <= MAX_TOTAL_'"); // set short counts - // if(sumcnt > MAXTOTAL) { + // if(sumcnt > MAX_TOTAL_) { // DEBUG_ASSERT(0); - // uint shift = GetShift(sumcnt, MAXTOTAL - nkeys); - // for(short i = 0; i < nkeys; i++) { - // if((order[i]->count _SHR_ASSIGN_ shift) == 0) - // order[i]->count = 1; total += order[i]->count; + // uint shift = GetShift(sumcnt, MAX_TOTAL_ - n_keys_); + // for(short i = 0; i < n_keys_; i++) { + // if((order_[i]->count _SHR_ASSIGN_ shift) == 0) + // order_[i]->count = 1; total += order_[i]->count; // } // } // else total = sumcnt; total = sumcnt; - tot_shift = core::GetBitLen(total - 1); - ASSERT((total <= MAXTOTAL) && (total > 0) && (1u _SHL_ tot_shift) >= total, - "should be '(total <= MAXTOTAL) && (total > 0) && (1u _SHL_ " - "tot_shift) >= total'"); + tot_shift_ = core::GetBitLen(total - 1); + ASSERT((total <= MAX_TOTAL_) && (total > 0) && (1u _SHL_ tot_shift_) >= total, + "should be '(total <= MAX_TOTAL_) && (total > 0) && (1u _SHL_ " + "tot_shift_) >= total'"); // correct counts to sum up to power of 2; set lows - uint high = (1u _SHL_ tot_shift), rest = high - total, d; - for (short i = nkeys; i > 0;) { + uint high = (1u _SHL_ tot_shift_), rest = high - total, d; + for (short i = n_keys_; i > 0;) { rest -= (d = rest / i--); - order[i]->low = (high -= (order[i]->count += d)); + order_[i]->low = (high -= (order_[i]->count += d)); } ASSERT(high == 0, "should be 'high == 0'"); - compress = true; + compress_ = true; } template @@ -95,49 +95,50 @@ int Dictionary::compare(const void *p1, const void *p2) { template void Dictionary::Save(RangeCoder *dest, T maxkey) { - ASSERT(compress, "'compress' should be true"); + ASSERT(compress_, "'compress_' should be true"); - // save no. of keys - dest->EncodeUniform(nkeys, (short)MAXKEYS); + // save no. of keys_ + dest->EncodeUniform(n_keys_, (short)MAX_KEYS_); uint bitmax = core::GetBitLen(maxkey); - uint c, prevc = MAXTOTAL - 1; - for (short i = 0; i < nkeys; i++) { + uint c, prevc = MAX_TOTAL_ - 1; + for (short i = 0; i < n_keys_; i++) { // save the key and its short count-1 (which is not greater than the // previous count-1) - dest->EncodeUniform(order[i]->key, maxkey, bitmax); - dest->EncodeUniform(c = order[i]->count - 1, prevc); + dest->EncodeUniform(order_[i]->key, maxkey, bitmax); + dest->EncodeUniform(c = order_[i]->count - 1, prevc); prevc = c; } } template void Dictionary::Load(RangeCoder *src, T maxkey) { - compress = decompress = false; + compress_ = decompress_ = false; - // load no. of keys - src->DecodeUniform(nkeys, (short)MAXKEYS); + // load no. of keys_ + src->DecodeUniform(n_keys_, (short)MAX_KEYS_); - // load keys, their 'lows' and 'highs'; fill 'cnt2val' array + // load keys_, their 'lows' and 'highs'; fill 'cnt2val_' array uint bitmax = core::GetBitLen(maxkey); - uint c, prevc = MAXTOTAL - 1; + uint c, prevc = MAX_TOTAL_ - 1; uint total = 0; - for (short i = 0; i < nkeys; i++) { - src->DecodeUniform(keys[i].key, maxkey, bitmax); + for (short i = 0; i < n_keys_; i++) { + src->DecodeUniform(keys_[i].key, maxkey, bitmax); src->DecodeUniform(c, prevc); prevc = c++; - keys[i].count = c; - keys[i].low = total; - if (total + c > MAXTOTAL) throw CprsErr::CPRS_ERR_COR; - for (; c > 0; c--) cnt2val[total++] = i; + keys_[i].count = c; + keys_[i].low = total; + if (total + c > MAX_TOTAL_) + throw CprsErr::CPRS_ERR_COR; + for (; c > 0; c--) cnt2val_[total++] = i; } - tot_shift = core::GetBitLen(total - 1); - ASSERT((total <= MAXTOTAL) && (total > 0) && (1u _SHL_ tot_shift) >= total, - "should be '(total <= MAXTOTAL) && (total > 0) && (1u _SHL_ " - "tot_shift) >= total'"); + tot_shift_ = core::GetBitLen(total - 1); + ASSERT((total <= MAX_TOTAL_) && (total > 0) && (1u _SHL_ tot_shift_) >= total, + "should be '(total <= MAX_TOTAL_) && (total > 0) && (1u _SHL_ " + "tot_shift_) >= total'"); - decompress = true; + decompress_ = true; } template class Dictionary; diff --git a/storage/tianmu/compress/dictionary.h b/storage/tianmu/compress/dictionary.h index 67a17371e..975781511 100644 --- a/storage/tianmu/compress/dictionary.h +++ b/storage/tianmu/compress/dictionary.h @@ -27,19 +27,19 @@ namespace compress { template struct Dictionary_Helper { - static constexpr size_t MAXKEYS = 4096; - static constexpr size_t NBUCK = 65536; + static constexpr size_t MAX_KEYS_ = 4096; + static constexpr size_t N_BUCK_ = 65536; }; template <> struct Dictionary_Helper { - static constexpr size_t MAXKEYS = 256; - static constexpr size_t NBUCK = 256; + static constexpr size_t MAX_KEYS_ = 256; + static constexpr size_t N_BUCK_ = 256; }; template <> struct Dictionary_Helper { - static constexpr size_t MAXKEYS = 1024; - static constexpr size_t NBUCK = 65536; + static constexpr size_t MAX_KEYS_ = 1024; + static constexpr size_t N_BUCK_ = 65536; }; // Data structure which holds a dictionary of numerical values used for @@ -51,34 +51,34 @@ struct Dictionary_Helper { template class Dictionary final { public: - static constexpr uint MAXTOTAL = RangeCoder::MAX_TOTAL; - static constexpr ushort MAXKEYS = Dictionary_Helper::MAXKEYS; + static constexpr uint MAX_TOTAL_ = RangeCoder::MAX_TOTAL_; + static constexpr ushort MAX_KEYS_ = Dictionary_Helper::MAX_KEYS_; struct KeyRange { T key; uint count; - uint low; // lows are set when all keys are inserted + uint low; // lows are set when all keys_ are inserted }; private: - KeyRange keys[MAXKEYS]; - short nkeys; + KeyRange keys_[MAX_KEYS_]; + short n_keys_; - // Hash table to index 'keys' array according to the 'key' field - short buckets[Dictionary_Helper::NBUCK]; // indices into 'keys'; -1 means - // empty bucket - short next[MAXKEYS]; // next[k] is the next element in a bucket after key no. - // k, or -1 - KeyRange *order[MAXKEYS]; + // Hash table to index 'keys_' array according to the 'key' field + short buckets_[Dictionary_Helper::N_BUCK_]; // indices into 'keys_'; -1 means + // empty bucket + short next_[MAX_KEYS_]; // next_[k] is the next_ element in a bucket after key no. + // k, or -1 + KeyRange *order_[MAX_KEYS_]; // For decompression - short cnt2val[MAXTOTAL]; // cnt2val[c] is an index of the key for cumulative - // count 'c' - uint tot_shift; // total = 1 << tot_shift + short cnt2val_[MAX_TOTAL_]; // cnt2val_[c] is an index of the key for cumulative + // count 'c' + uint tot_shift_; // total = 1 << tot_shift_ static int compare(const void *p1, - const void *p2); // for sorting keys by descending 'count' - bool compress, decompress; // says if internal structures are set to perform + const void *p2); // for sorting keys_ by descending 'count' + bool compress_, decompress_; // says if internal structures are set to perform // compression or decompression void Clear(); uint hash(T key) { return (ushort)key; } @@ -90,28 +90,29 @@ class Dictionary final { // Insert(): if 'key' is already in dictionary, increase its count by 'count'. // Otherwise insert the key and set count to 'count'. void InitInsert() { Clear(); } - // returns false if too many keys + // returns false if too many keys_ bool Insert(T key, uint count = 1) { uint b = hash(key); - short k = buckets[b]; - while ((k >= 0) && (keys[k].key != key)) k = next[k]; + short k = buckets_[b]; + while ((k >= 0) && (keys_[k].key != key)) k = next_[k]; if (k < 0) { - if (nkeys >= MAXKEYS) return false; - keys[nkeys].key = key; - keys[nkeys].count = count; - next[nkeys] = buckets[b]; // TODO: time - insert new keys at the END of the list - buckets[b] = nkeys++; + if (n_keys_ >= MAX_KEYS_) + return false; + keys_[n_keys_].key = key; + keys_[n_keys_].count = count; + next_[n_keys_] = buckets_[b]; // TODO: time - insert new keys_ at the END of the list + buckets_[b] = n_keys_++; } else - keys[k].count += count; + keys_[k].count += count; return true; } KeyRange *GetKeys(short &n) { - n = nkeys; - return keys; + n = n_keys_; + return keys_; } - void SetLows(); // set lows/highs of keys + void SetLows(); // set lows/highs of keys_ void Save(RangeCoder *dest, T maxkey); // maxkey - the largest key or something bigger @@ -120,23 +121,23 @@ class Dictionary final { // returns true when ESC was encoded ('key' is not in dictionary) bool Encode(RangeCoder *dest, T key) { - DEBUG_ASSERT(compress); + DEBUG_ASSERT(compress_); // find the 'key' in the hash uint b = hash(key); - short k = buckets[b]; - while ((k >= 0) && (keys[k].key != key)) k = next[k]; + short k = buckets_[b]; + while ((k >= 0) && (keys_[k].key != key)) k = next_[k]; DEBUG_ASSERT(k >= 0); // TODO: handle ESC encoding - dest->EncodeShift(keys[k].low, keys[k].count, tot_shift); + dest->EncodeShift(keys_[k].low, keys_[k].count, tot_shift_); return false; } bool Decode(RangeCoder *src, T &key) { - DEBUG_ASSERT(decompress); - uint count = src->GetCountShift(tot_shift); - short k = cnt2val[count]; // TODO: handle ESC decoding - key = keys[k].key; - src->DecodeShift(keys[k].low, keys[k].count, tot_shift); + DEBUG_ASSERT(decompress_); + uint count = src->GetCountShift(tot_shift_); + short k = cnt2val_[count]; // TODO: handle ESC decoding + key = keys_[k].key; + src->DecodeShift(keys_[k].low, keys_[k].count, tot_shift_); return false; } }; diff --git a/storage/tianmu/compress/inc_alloc.cpp b/storage/tianmu/compress/inc_alloc.cpp index fe5b759f7..5b4845512 100644 --- a/storage/tianmu/compress/inc_alloc.cpp +++ b/storage/tianmu/compress/inc_alloc.cpp @@ -21,56 +21,56 @@ namespace Tianmu { namespace compress { IncAlloc::IncAlloc(uint fsize) { - blk = 0; - used = 0; - firstsize = fsize; + blk_ = 0; + used_ = 0; + firstsize_ = fsize; } void IncAlloc::freeall() { - blk = 0; - used = 0; + blk_ = 0; + used_ = 0; clearfrag(); } void IncAlloc::clear() { - blk = 0; - used = 0; - while (!blocks.empty()) { - delete[] (char *)(blocks.back().mem); - blocks.pop_back(); + blk_ = 0; + used_ = 0; + while (!blocks_.empty()) { + delete[](char *)(blocks_.back().mem); + blocks_.pop_back(); } clearfrag(); } void IncAlloc::clearfrag() { - for (uint i = 0; i <= MAXFRAGSIZE; i++) frags[i].clear(); + for (uint i = 0; i <= MAX_FRAG_SIZE_; i++) frags_[i].clear(); } void *IncAlloc::_alloc_search(uint size) { // find a block with enough space - while ((blk < blocks.size()) && (blocks[blk].size < used + size)) { - blk++; - used = 0; + while ((blk_ < blocks_.size()) && (blocks_[blk_].size < used_ + size)) { + blk_++; + used_ = 0; } - if (blk < blocks.size()) { - void *mem = (char *)blocks[blk].mem + used; - used += size; + if (blk_ < blocks_.size()) { + void *mem = (char *)blocks_[blk_].mem + used_; + used_ += size; return mem; } // allocate a new block - DEBUG_ASSERT(blk == blocks.size()); - uint bsize = firstsize; - if (blk > 0) { - bsize = (uint)(blocks[blk - 1].size * GROWSIZE + ROUNDUP) / ROUNDUP * ROUNDUP; - DEBUG_ASSERT(bsize > blocks[blk - 1].size); + DEBUG_ASSERT(blk_ == blocks_.size()); + uint bsize = firstsize_; + if (blk_ > 0) { + bsize = (uint)(blocks_[blk_ - 1].size * GROW_SIZE_ + ROUNDUP_) / ROUNDUP_ * ROUNDUP_; + DEBUG_ASSERT(bsize > blocks_[blk_ - 1].size); if (bsize < size) bsize = size; } void *mem = new char[bsize]; if (!mem) throw CprsErr::CPRS_ERR_MEM; - blocks.push_back(Block(mem, bsize)); - used = size; + blocks_.push_back(Block(mem, bsize)); + used_ = size; return mem; } @@ -80,10 +80,10 @@ void IncAlloc::GetMemUsg(uint &memblock, uint &memalloc, uint &memused) { memalloc = 0; uint memfree = 0; uint i; - for (i = 0; i < blocks.size(); i++) memblock += blocks[i].size; - for (i = 0; i < blk; i++) memalloc += blocks[i].size; - memalloc += used; - for (i = 1; i <= MAXFRAGSIZE; i++) memfree += uint(i * frags[i].size()); + for (i = 0; i < blocks_.size(); i++) memblock += blocks_[i].size; + for (i = 0; i < blk_; i++) memalloc += blocks_[i].size; + memalloc += used_; + for (i = 1; i <= MAX_FRAG_SIZE_; i++) memfree += uint(i * frags_[i].size()); memused = memalloc - memfree; } diff --git a/storage/tianmu/compress/inc_alloc.h b/storage/tianmu/compress/inc_alloc.h index 0f67d2970..2a525eb8f 100644 --- a/storage/tianmu/compress/inc_alloc.h +++ b/storage/tianmu/compress/inc_alloc.h @@ -30,9 +30,9 @@ namespace compress { // Incremental memory allocator. // May throw CprsErr::CPRS_ERR_MEM. class IncAlloc { - static constexpr int FIRSTSIZE = 16384; // default size of the first block - static constexpr int ROUNDUP = 4096; // block size will be a multiple of this - static constexpr double GROWSIZE = 1.2; // how big is the next block compared to the previous one + static constexpr int FIRST_SIZE_ = 16384; // default size of the first block + static constexpr int ROUNDUP_ = 4096; // block size will be a multiple of this + static constexpr double GROW_SIZE_ = 1.2; // how big is the next block compared to the previous one struct Block { void *mem; @@ -41,25 +41,25 @@ class IncAlloc { Block(void *m, uint s) : mem(m), size(s) {} }; - std::vector blocks; - uint blk; // index of the current block == no. of blocks already filled up - uint used; // no. of bytes in block 'blk' already used + std::vector blocks_; + uint blk_; // index of the current block == no. of blocks_ already filled up + uint used_; // no. of bytes in block 'blk_' already used_ - uint firstsize; // size of the first block to allocate + uint firstsize_; // size of the first block to allocate - // frags[s] - list of pointers to free'd fragments of size 's'; + // frags_[s] - list of pointers to free'd fragments of size 's'; // free'd fragments can be reused by alloc() - static const uint MAXFRAGSIZE = 6144; + static const uint MAX_FRAG_SIZE_ = 6144; // static const uint MAXNUMFRAG = 65536 * 2; // max. no. of fragments of a - // given size - never used as fragments are created on demand and we have no + // given size - never used_ as fragments are created on demand and we have no // control on their number - std::vector frags[MAXFRAGSIZE + 1]; + std::vector frags_[MAX_FRAG_SIZE_ + 1]; - // currently, 'size' must be at most MAXFRAGSIZE + // currently, 'size' must be at most MAX_FRAG_SIZE_ void *_alloc(uint size) { // find a fragment... - DEBUG_ASSERT(size && (size <= MAXFRAGSIZE)); - auto &frag = frags[size]; + DEBUG_ASSERT(size && (size <= MAX_FRAG_SIZE_)); + auto &frag = frags_[size]; if (frag.size()) { void *mem = frag.back(); frag.pop_back(); @@ -67,9 +67,9 @@ class IncAlloc { } // ...or take a new one from the current block - if (blk < blocks.size() && (blocks[blk].size >= used + size)) { - void *mem = (char *)blocks[blk].mem + used; - used += size; + if (blk_ < blocks_.size() && (blocks_[blk_].size >= used_ + size)) { + void *mem = (char *)blocks_[blk_].mem + used_; + used_ += size; return mem; } @@ -77,10 +77,10 @@ class IncAlloc { return _alloc_search(size); } void *_alloc_search(uint size); // more complicated part of _alloc, executed rarely - // put 'p' into frags[size]; does NOT check if p points into a block! + // put 'p' into frags_[size]; does NOT check if p points into a block! void _free(void *p, uint size) { - DEBUG_ASSERT(size && (size <= MAXFRAGSIZE)); - frags[size].push_back(p); + DEBUG_ASSERT(size && (size <= MAX_FRAG_SIZE_)); + frags_[size].push_back(p); } public: @@ -93,16 +93,16 @@ class IncAlloc { _free(p, sizeof(T) * n); } - void freeall(); // mark all blocks as free, without deallocation; clear lists + void freeall(); // mark all blocks_ as free, without deallocation; clear lists // of fragments - void clear(); // physically deallocate all blocks; clear lists of fragments + void clear(); // physically deallocate all blocks_; clear lists of fragments void clearfrag(); // clear lists of fragments void GetMemUsg(uint &memblock, uint &memalloc, uint &memused); void PrintMemUsg(FILE *f); void PrintMemUsg(std::ostream &str); - IncAlloc(uint fsize = FIRSTSIZE); + IncAlloc(uint fsize = FIRST_SIZE_); ~IncAlloc() { clear(); } }; diff --git a/storage/tianmu/compress/inc_wgraph.cpp b/storage/tianmu/compress/inc_wgraph.cpp index 9af22361f..1eb4fc892 100644 --- a/storage/tianmu/compress/inc_wgraph.cpp +++ b/storage/tianmu/compress/inc_wgraph.cpp @@ -27,47 +27,47 @@ namespace compress { const uint IncWGraph::MatchLenCoder::c2[] = {120, 128}; const uint IncWGraph::MatchLenCoder::c3[] = {102, 119, 128}; -IncWGraph::IncWGraph() : memory() { - dump = NULL; - ROOT = NIL = START = NULL; - mask = NULL; - coder = NULL; - reclen = NULL; - records = NULL; +IncWGraph::IncWGraph() : memory_() { + dump_ = NULL; + ROOT_ = NIL_ = START_ = NULL; + p_mask_ = NULL; + coder_ = NULL; + reclen_ = NULL; + records_ = NULL; } void IncWGraph::Init() { - nfinals = 0; - matchlen_cost = esc_cost = 0; - // mask = &_mask_; // uncomment if masking should be used - mask = NULL; - - // create ROOT and NIL nodes (and START) - memory.alloc(ROOT); - memory.alloc(NIL); - memory.alloc(START); - - ROOT->edge = 0; - ROOT->nedge = 0; - ROOT->endpos = 0; - ROOT->suf = NIL; - ROOT->total = ROOT->EscCount(); - - *START = *ROOT; - - memory.alloc(NIL->edge, 256); - NIL->nedge = 0; // means 256 - NIL->endpos = 0; - NIL->suf = 0; - NIL->total = 256; - - // create edges from NIL to ROOT, for every possible symbol as a label - for (uint s = 0; s < 256; s++) NIL->edge[s].Init((uchar)s, 1, true, ROOT, 1); + nfinals_ = 0; + matchlen_cost_ = esc_cost_ = 0; + // p_mask_ = &_mask_; // uncomment if masking should be used + p_mask_ = NULL; + + // create ROOT_ and NIL_ nodes (and START_) + memory_.alloc(ROOT_); + memory_.alloc(NIL_); + memory_.alloc(START_); + + ROOT_->edge = 0; + ROOT_->nedge = 0; + ROOT_->endpos = 0; + ROOT_->suf = NIL_; + ROOT_->total = ROOT_->EscCount(); + + *START_ = *ROOT_; + + memory_.alloc(NIL_->edge, 256); + NIL_->nedge = 0; // means 256 + NIL_->endpos = 0; + NIL_->suf = 0; + NIL_->total = 256; + + // create edges from NIL_ to ROOT_, for every possible symbol as a label + for (uint s = 0; s < 256; s++) NIL_->edge[s].Init((uchar)s, 1, true, ROOT_, 1); } void IncWGraph::Clear() { - memory.freeall(); - recent.clear(); - ROOT = NIL = START = 0; + memory_.freeall(); + recent_.clear(); + ROOT_ = NIL_ = START_ = 0; } //------------------------------------------------------------------------------------------- @@ -76,9 +76,9 @@ void IncWGraph::Encode(RangeCoder *cod, char **index, const uint *lens, int nrec if ((nrec < 1) || (nrec > 65536) || (!cod)) throw CprsErr::CPRS_ERR_PAR; Clear(); Init(); - coder = cod; - records = (uchar **)index; - reclen = lens; + coder_ = cod; + records_ = (uchar **)index; + reclen_ = lens; packlen = 0; bool repeated; @@ -92,21 +92,21 @@ void IncWGraph::Encode(RangeCoder *cod, char **index, const uint *lens, int nrec if (/*repetitions &&*/ rec && (lens[rec] == lens[rec - 1])) { // is the previous record repeated? - if (std::memcmp(records[rec], records[rec - 1], lens[rec]) == 0) - coder->Encode(0, rep++, total); + if (std::memcmp(records_[rec], records_[rec - 1], lens[rec]) == 0) + coder_->Encode(0, rep++, total); else { - coder->Encode(rep, total - rep, total); + coder_->Encode(rep, total - rep, total); EncodeRec((ushort)rec, repeated); } - if (++total > coder->MAX_TOTAL) { + if (++total > coder_->MAX_TOTAL_) { rep >>= 1; total >>= 1; } // are the repetitions indeed statistically significant? - // if(((total & 63) == 0) && (total > 1000) && (rep*(nfinals+2) < - // 3*total)) repetitions = false; 1/nfinals - prob. of repetition when - // records are random, independent, uniform distribution (lower + // if(((total & 63) == 0) && (total > 1000) && (rep*(nfinals_+2) < + // 3*total)) repetitions = false; 1/nfinals_ - prob. of repetition when + // records_ are random, independent, uniform distribution (lower // approximation) } else EncodeRec((ushort)rec, repeated); @@ -120,9 +120,9 @@ void IncWGraph::Decode(RangeCoder *cod, char **index, const uint *lens, int nrec if ((nrec < 1) || (nrec > 65536) || (!cod)) throw CprsErr::CPRS_ERR_PAR; Clear(); Init(); - coder = cod; - records = (uchar **)index; - reclen = lens; + coder_ = cod; + records_ = (uchar **)index; + reclen_ = lens; bool repeated; // bool repetitions = true; // start: assume optimistically that @@ -130,27 +130,27 @@ void IncWGraph::Decode(RangeCoder *cod, char **index, const uint *lens, int nrec uint rep = 1, total = 2, cnt; // start: rep=1, nonrep=1 uint sum = 0; for (int rec = 0; rec < nrec; rec++) { - records[rec] = (uchar *)dest + sum; + records_[rec] = (uchar *)dest + sum; if (lens[rec] == 0) continue; repeated = true; if (/*repetitions &&*/ rec && (lens[rec] == lens[rec - 1])) { // is the previous record repeated? - if ((cnt = coder->GetCount(total)) < rep) { - coder->Decode(0, rep++, total); - records[rec] = records[rec - 1]; - // std::memcpy(records[rec], records[rec-1], lens[rec]); + if ((cnt = coder_->GetCount(total)) < rep) { + coder_->Decode(0, rep++, total); + records_[rec] = records_[rec - 1]; + // std::memcpy(records_[rec], records_[rec-1], lens[rec]); } else { - coder->Decode(rep, total - rep, total); + coder_->Decode(rep, total - rep, total); DecodeRec((ushort)rec, dlen - sum, repeated); } - if (++total > coder->MAX_TOTAL) { + if (++total > coder_->MAX_TOTAL_) { rep >>= 1; total >>= 1; } // are the repetitions indeed statistically significant? - // if(((total & 63) == 0) && (total > 1000) && (rep*(nfinals+2) < + // if(((total & 63) == 0) && (total > 1000) && (rep*(nfinals_+2) < // 3*total)) repetitions = false; } else DecodeRec((ushort)rec, dlen - sum, repeated); @@ -160,16 +160,17 @@ void IncWGraph::Decode(RangeCoder *cod, char **index, const uint *lens, int nrec } void IncWGraph::EncodeRec(ushort rec, bool &repeated) { - ASSERT(reclen[rec] < 65536, "bad length for word graph"); + ASSERT(reclen_[rec] < 65536, "bad length for word graph"); ushort proj = 0, edgelen, maxlen; - ushort restlen = reclen[rec]; - uchar *s = records[rec]; - Node *base = START, *final = 0; + ushort restlen = reclen_[rec]; + uchar *s = records_[rec]; + Node *base = START_, *final = 0; Edge *edge; Count low, total; bool solid; - if (mask) mask->Reset(); + if (p_mask_) + p_mask_->Reset(); // loop over consecutive substrings to encode: // (1) escape to the first node with transition starting with symbol *s, @@ -181,8 +182,8 @@ void IncWGraph::EncodeRec(ushort rec, bool &repeated) { // (1) while (1) { - total = base->GetMaskTotal(mask); - if (base->FindEdge(*s, edge, low, mask)) + total = base->GetMaskTotal(p_mask_); + if (base->FindEdge(*s, edge, low, p_mask_)) break; else { repeated = false; @@ -192,19 +193,19 @@ void IncWGraph::EncodeRec(ushort rec, bool &repeated) { } else solid = false; #ifdef MAKESTAT - uint pos = coder->GetPos(); + uint pos = coder_->GetPos(); #endif - base->EncodeEsc(coder, low, total); + base->EncodeEsc(coder_, low, total); #ifdef MAKESTAT - esc_cost += coder->GetPos() - pos; + esc_cost_ += coder_->GetPos() - pos; #endif - base->AddEdge(*s, restlen, solid, final, memory); + base->AddEdge(*s, restlen, solid, final, memory_); base = base->suf; } } // (2) - coder->Encode(low, edge->count, total); // encode the edge choice + coder_->Encode(low, edge->count, total); // encode the edge choice // find and encode the no. of matching symbols (incl. the first one) edgelen = edge->GetLen(); @@ -214,11 +215,11 @@ void IncWGraph::EncodeRec(ushort rec, bool &repeated) { restlen -= proj; if ((proj < edge->GetLen()) && restlen) repeated = false; #ifdef MAKESTAT - uint pos = coder->GetPos(); + uint pos = coder_->GetPos(); #endif - MatchLenCoder::Encode(coder, proj, maxlen, dump); + MatchLenCoder::Encode(coder_, proj, maxlen, dump_); #ifdef MAKESTAT - matchlen_cost += coder->GetPos() - pos; + matchlen_cost_ += coder_->GetPos() - pos; #endif // (3) @@ -229,25 +230,26 @@ void IncWGraph::EncodeRec(ushort rec, bool &repeated) { void IncWGraph::DecodeRec(ushort rec, uint dlen, bool &repeated) { ushort proj = 0, declen, edgelen, maxlen; - ASSERT(reclen[rec] < 65536, "bad length for word graph"); - ushort len = reclen[rec]; + ASSERT(reclen_[rec] < 65536, "bad length for word graph"); + ushort len = reclen_[rec]; ushort restlen = len; - uchar *s = records[rec]; + uchar *s = records_[rec]; uchar fsym = 0; - Node *base = START, *final = 0; + Node *base = START_, *final = 0; Edge *edge; Count low, count, total; bool solid; - if (mask) mask->Reset(); + if (p_mask_) + p_mask_->Reset(); while (restlen) { DEBUG_ASSERT(proj == 0); // (1) while (1) { - total = base->GetMaskTotal(mask); - count = (Count)coder->GetCount(total); - if (base->FindEdge(count, edge, low, mask)) + total = base->GetMaskTotal(p_mask_); + count = (Count)coder_->GetCount(total); + if (base->FindEdge(count, edge, low, p_mask_)) break; else { if (repeated) { @@ -262,63 +264,65 @@ void IncWGraph::DecodeRec(ushort rec, uint dlen, bool &repeated) { solid = true; } else solid = false; - base->DecodeEsc(coder, low, total); - recent.push_back(base->AddEdge(0, restlen, solid, final, memory)); + base->DecodeEsc(coder_, low, total); + recent_.push_back(base->AddEdge(0, restlen, solid, final, memory_)); base = base->suf; } } // (2) - coder->Decode(low, edge->count, total); // decode the edge choice + coder_->Decode(low, edge->count, total); // decode the edge choice // decode the no. of matching symbols (incl. the first one) edgelen = edge->GetLen(); maxlen = (edgelen < restlen ? edgelen : restlen); // min(edgelen,restlen) - MatchLenCoder::Decode(coder, proj, maxlen); + MatchLenCoder::Decode(coder_, proj, maxlen); // copy decoded symbols to 's' restlen -= proj; if (!repeated) { - *s++ = fsym = edge->fsym; // needed when base=NIL + *s++ = fsym = edge->fsym; // needed when base=NIL_ LabelCopy(s, edge->target->endpos - edgelen + 1, proj - 1); } else if (restlen == 0) { s = edge->target->endpos - edgelen + proj; - records[rec] = s - len; // set correct index for repeated record + records_[rec] = s - len; // set correct index for repeated record } else if (proj < edge->GetLen()) { // it appears that the record is not a // repetition - DEBUG_ASSERT(base != NIL); + DEBUG_ASSERT(base != NIL_); if (len > dlen) throw CprsErr::CPRS_ERR_BUF; declen = len - restlen; LabelCopy(s, edge->target->endpos - edgelen + proj - declen, declen); // copy at once all labels passed till now repeated = false; - DEBUG_ASSERT(recent.empty()); + DEBUG_ASSERT(recent_.empty()); } // set 'fsym' of recently created edges - for (std::vector::size_type i = recent.size(); i;) recent[--i]->fsym = fsym; - recent.clear(); + for (std::vector::size_type i = recent_.size(); i;) recent_[--i]->fsym = fsym; + recent_.clear(); // (3) Traverse(base, edge, proj, s, restlen, final, false); } if (final && !final->suf) final->suf = base; - DEBUG_ASSERT(recent.empty()); + DEBUG_ASSERT(recent_.empty()); } //------------------------------------------------------------------------------------------- inline void IncWGraph::Traverse(Node *&base, Edge *&edge, ushort &proj, uchar *s, ushort restlen, Node *&final, bool encode) { - if (mask) mask->Reset(); + if (p_mask_) + p_mask_->Reset(); if (proj == edge->GetLen()) { // we reached a node // update count, perhaps exchange edges to keep them sorted, make scaling if // necessary - if (base != NIL) base->UpdateCount(edge); + if (base != NIL_) + base->UpdateCount(edge); // node duplication if (!edge->IsSolid()) - base = edge->target->Duplicate(base, edge, memory); + base = edge->target->Duplicate(base, edge, memory_); else base = edge->target; // canonize proj = 0; @@ -339,7 +343,7 @@ inline void IncWGraph::Traverse(Node *&base, Edge *&edge, ushort &proj, uchar *s if (encode) node->AddEdge(*s, restlen, solid, final, node->EscCount()); else - recent.push_back(node->AddEdge(0, restlen, solid, final, node->EscCount())); + recent_.push_back(node->AddEdge(0, restlen, solid, final, node->EscCount())); solid = false; } else if (final && !final->suf) { final->suf = node; @@ -358,7 +362,8 @@ inline void IncWGraph::Traverse(Node *&base, Edge *&edge, ushort &proj, uchar *s base = Node::Canonize(base->suf, edge, proj, s, true); } last->suf = base; - if (mask) mask->Add(last->edge[0].fsym); // assumption: edge[0] is the edge that was split + if (p_mask_) + p_mask_->Add(last->edge[0].fsym); // assumption: edge[0] is the edge that was split } } @@ -448,8 +453,8 @@ void IncWGraph::Node::Rescale(uchar shift) { IncWGraph::Node *IncWGraph::InsertNode([[maybe_unused]] Node *base, Edge *edge, ushort proj) { Node *node; - memory.alloc(node); - memory.alloc(node->edge, node->RoundNEdge(node->nedge = 1)); + memory_.alloc(node); + memory_.alloc(node->edge, node->RoundNEdge(node->nedge = 1)); DEBUG_ASSERT(node->RoundNEdge(1) == node->RoundNEdge(2)); // edge from 'node' to 'edge->target' @@ -660,12 +665,12 @@ inline void IncWGraph::MatchLenCoder::Decode(RangeCoder *coder, ushort &proj, us inline IncWGraph::Node *IncWGraph::NewFinal(uchar *endpos) { Node *f; - memory.alloc(f); + memory_.alloc(f); f->endpos = endpos; f->edge = 0; f->suf = 0; f->total = f->EscCount(); - nfinals++; + nfinals_++; return f; } @@ -680,8 +685,9 @@ inline void IncWGraph::LabelCopy(uchar *&dest, const uchar *src, ushort len) { void IncWGraph::Print(std::ostream &str, uint flags, Node *n) { if (!n) { - if (START && (START != ROOT)) Print(str, flags, START); - Print(str, flags, ROOT); + if (START_ && (START_ != ROOT_)) + Print(str, flags, START_); + Print(str, flags, ROOT_); return; } str << "- " << n << ": "; diff --git a/storage/tianmu/compress/inc_wgraph.h b/storage/tianmu/compress/inc_wgraph.h index 731799c60..8171fbd69 100644 --- a/storage/tianmu/compress/inc_wgraph.h +++ b/storage/tianmu/compress/inc_wgraph.h @@ -33,7 +33,7 @@ class IncWGraph { struct Edge; class Mask; struct Node { - static const uint MAX_TOTAL = RangeCoder::MAX_TOTAL; + static const uint MAX_TOTAL = RangeCoder::MAX_TOTAL_; static const Count init_count = 1; // initial value of a count static const Count updt_count = 1; // how much a count is updated in a single step // static const uchar max_count_last = 2; @@ -46,7 +46,7 @@ class IncWGraph { Count total; // total count of outgoing edges, incl. ESC uchar nedge; // size of 'edge'; 0 means 256 - bool IsNIL() { return suf == 0; } // risky method for detecting NIL node + bool IsNIL() { return suf == 0; } // risky method for detecting NIL_ node ushort GetNEdge() { return edge ? (nedge ? nedge : 256) : 0; } static ushort RoundNEdge(ushort n); bool FindEdge(uchar s, Edge *&e, Count &low, Mask *mask); @@ -106,21 +106,21 @@ class IncWGraph { }; class Mask { - std::bitset<255> map; - uint nset; + std::bitset<255> map_; + uint n_set_; public: - bool Masked(uchar s) { return map.test(s); } + bool Masked(uchar s) { return map_.test(s); } void Add(uchar s) { DEBUG_ASSERT(!Masked(s)); - map.set(s); - nset++; + map_.set(s); + n_set_++; } void Reset() { - map.reset(); - nset = 0; - } //{ std::memset(b_map,0,sizeof(map)); } - uint NumSet() { return nset; } + map_.reset(); + n_set_ = 0; + } //{ std::memset(b_map,0,sizeof(map_)); } + uint NumSet() { return n_set_; } // Mask() { Reset(); } }; @@ -133,17 +133,17 @@ class IncWGraph { static void Decode(RangeCoder *coder, ushort &proj, ushort edgelen); }; - IncAlloc memory; // all elements of the graph are allocated with IncAlloc - Node *ROOT, *NIL, *START; - std::vector recent; // edges with 'fsym' fields waiting for - // initialization (during decoding) - Mask _mask_, *mask; + IncAlloc memory_; // all elements of the graph are allocated with IncAlloc + Node *ROOT_, *NIL_, *START_; + std::vector recent_; // edges with 'fsym' fields waiting for + // initialization (during decoding) + Mask _mask_, *p_mask_; - RangeCoder *coder; - uchar **records; - const uint *reclen; + RangeCoder *coder_; + uchar **records_; + const uint *reclen_; - uint nfinals; + uint nfinals_; Node *InsertNode(Node *base, Edge *edge, ushort proj); // insert new node, in the middle of the 'edge' @@ -155,7 +155,7 @@ class IncWGraph { void DecodeRec(ushort rec, uint dlen, bool &repeated); void Init(); - void Clear(); // clear graph structure withOUT memory deallocation + void Clear(); // clear graph structure withOUT memory_ deallocation public: IncWGraph(); @@ -165,18 +165,18 @@ class IncWGraph { // Upon exit, 'index' will contain indices into 'dest'. // 'dlen' - size of 'dest'; must be >= 'packlen' returned from Encode during - // compression (which is <= total length of records). + // compression (which is <= total length of records_). void Decode(RangeCoder *cod, char **index, const uint *lens, int nrec, char *dest, uint dlen); void Print(std::ostream &str = std::cout, uint flags = 1, Node *n = 0); void PrintLbl(std::ostream &str, Edge *e); // for gathering statistics - FILE *dump; - uint matchlen_cost, esc_cost; // length of code produced in MatchLen::Encode - // and Node::EncodeEsc - void PrintMemUsg(FILE *f) { memory.PrintMemUsg(f); } - void PrintMemUsg(std::ostream &str) { memory.PrintMemUsg(str); } + FILE *dump_; + uint matchlen_cost_, esc_cost_; // length of code produced in MatchLen::Encode + // and Node::EncodeEsc + void PrintMemUsg(FILE *f) { memory_.PrintMemUsg(f); } + void PrintMemUsg(std::ostream &str) { memory_.PrintMemUsg(str); } }; } // namespace compress diff --git a/storage/tianmu/compress/num_compressor.h b/storage/tianmu/compress/num_compressor.h index 53beb59c5..40581a808 100644 --- a/storage/tianmu/compress/num_compressor.h +++ b/storage/tianmu/compress/num_compressor.h @@ -40,8 +40,8 @@ namespace compress { class NumCompressorBase { public: virtual ~NumCompressorBase() {} - static const uint NFILTERS = 7; - FILE *dump; + static const uint N_FILTERS_ = 7; + FILE *dump_; // statistics struct Stat { @@ -55,7 +55,7 @@ class NumCompressorBase { } }; struct Stats : public std::vector { - Stats() : std::vector(NumCompressorBase::NFILTERS) {} + Stats() : std::vector(NumCompressorBase::N_FILTERS_) {} // Stats(size_type cnt): std::vector(cnt) {} Stats &operator+=(Stats &s) { ASSERT(size() == s.size(), "should be 'size() == s.size()'"); @@ -70,7 +70,7 @@ class NumCompressorBase { for (size_type i = 0; i < size(); i++) (*this)[i].Clear(); } }; - Stats stats; + Stats stats_; // non-templated type-safe compression methods virtual CprsErr Compress([[maybe_unused]] char *dest, [[maybe_unused]] uint &len, [[maybe_unused]] const uchar *src, @@ -142,7 +142,7 @@ class NumCompressorBase { template class NumCompressor : public NumCompressorBase { private: - bool copy_only; + bool copy_only_; // compress by simple copying the data CprsErr CopyCompress(char *dest, uint &len, const T *src, uint nrec); @@ -152,7 +152,7 @@ class NumCompressor : public NumCompressorBase { public: // Filters - compression algorithms: - std::vector>> filters; + std::vector>> filters_; NumCompressor(bool copy_only = false); virtual ~NumCompressor(); @@ -215,21 +215,21 @@ inline CprsErr NumDecompress(T *dest, char *src, uint len, uint nrec, uint64_t m //------------------------------------------------------------------------- template -NumCompressor::NumCompressor(bool copy_only) : copy_only(copy_only) { - dump = NULL; - - // Create filters - filters.reserve(NFILTERS); - // filters.emplace_back(new DataFilt_RLE); - filters.emplace_back(new DataFilt_Min); - filters.emplace_back(new DataFilt_GCD); - filters.emplace_back(new DataFilt_Diff); - filters.emplace_back(new PartDict); - filters.emplace_back(new TopBitDict(true)); // top bits - filters.emplace_back(new TopBitDict(false)); // low bits - filters.emplace_back(new DataFilt_Uniform); - ASSERT(filters.size() == NFILTERS, "should be 'filters.size() == NFILTERS'"); - IFSTAT(stats.resize(filters.size())); +NumCompressor::NumCompressor(bool copy_only) : copy_only_(copy_only) { + dump_ = NULL; + + // Create filters_ + filters_.reserve(N_FILTERS_); + // filters_.emplace_back(new DataFilt_RLE); + filters_.emplace_back(new DataFilt_Min); + filters_.emplace_back(new DataFilt_GCD); + filters_.emplace_back(new DataFilt_Diff); + filters_.emplace_back(new PartDict); + filters_.emplace_back(new TopBitDict(true)); // top bits + filters_.emplace_back(new TopBitDict(false)); // low bits + filters_.emplace_back(new DataFilt_Uniform); + ASSERT(filters_.size() == N_FILTERS_, "should be 'filters_.size() == N_FILTERS_'"); + IFSTAT(stats_.resize(filters_.size())); } template @@ -239,11 +239,13 @@ NumCompressor::~NumCompressor() {} template void NumCompressor::DumpData(DataSet *ds, uint f) { - if (!dump) return; + if (!dump_) + return; uint nbit = core::GetBitLen(ds->maxval); - std::fprintf(dump, "%u: %u %I64u %u ; ", f, ds->nrec, (uint64_t)ds->maxval, nbit); - if (f) filters[f - 1]->LogCompress(dump); - std::fprintf(dump, "\n"); + std::fprintf(dump_, "%u: %u %I64u %u ; ", f, ds->nrec, (uint64_t)ds->maxval, nbit); + if (f) + filters_[f - 1]->LogCompress(dump_); + std::fprintf(dump_, "\n"); } //------------------------------------------------------------------------- @@ -275,12 +277,13 @@ CprsErr NumCompressor::CompressT(char *dest, uint &len, const T *src, uint nr if (!dest || !src || (len < 3)) return CprsErr::CPRS_ERR_BUF; if ((nrec == 0) || (maxval == 0)) return CprsErr::CPRS_ERR_PAR; - if (copy_only) return CopyCompress(dest, len, src, nrec); + if (copy_only_) + return CopyCompress(dest, len, src, nrec); *dest = 1; // version uint posID = 1, pos = 3; // 1 byte reserved for compression ID ushort ID = 0; - ASSERT(filters.size() <= 8 * sizeof(ID), "should be 'filters.size() <= 8*sizeof(ID)'"); + ASSERT(filters_.size() <= 8 * sizeof(ID), "should be 'filters_.size() <= 8*sizeof(ID)'"); std::vector buf(src, src + nrec); DataSet dataset = {&buf[0], maxval, nrec}; @@ -289,15 +292,16 @@ CprsErr NumCompressor::CompressT(char *dest, uint &len, const T *src, uint nr try { RangeCoder coder; coder.InitCompress(dest, len, pos); - IFSTAT(stats.Clear()); + IFSTAT(stats_.Clear()); - // main loop: apply all compression algorithms from 'filters' + // main loop: apply all compression algorithms from 'filters_' uint f = 0; IFSTAT(DumpData(&dataset, f)); - for (; (f < filters.size()) && dataset.nrec; f++) { + for (; (f < filters_.size()) && dataset.nrec; f++) { IFSTAT(clock_t t1 = clock()); - if (filters[f]->Encode(&coder, &dataset)) ID |= 1 << f; - IFSTAT(stats[f].tc += clock() - t1); + if (filters_[f]->Encode(&coder, &dataset)) + ID |= 1 << f; + IFSTAT(stats_[f].tc += clock() - t1); IFSTAT(if (ID & (1 << f)) DumpData(&dataset, f + 1)); } @@ -334,21 +338,23 @@ CprsErr NumCompressor::DecompressT(T *dest, char *src, uint len, uint nrec, T try { RangeCoder coder; coder.InitDecompress(src, len, pos); - uint f = 0, nfilt = (uint)filters.size(); - IFSTAT(stats.Clear()); + uint f = 0, nfilt = (uint)filters_.size(); + IFSTAT(stats_.Clear()); // 1st stage of decompression for (; (f < nfilt) && dataset.nrec; f++) { IFSTAT(clock_t t1 = clock()); - if (ID & (1 << f)) filters[f]->Decode(&coder, &dataset); - IFSTAT(stats[f].td += clock() - t1); + if (ID & (1 << f)) + filters_[f]->Decode(&coder, &dataset); + IFSTAT(stats_[f].td += clock() - t1); } // 2nd stage for (; f > 0;) { IFSTAT(clock_t t1 = clock()); - if (ID & (1 << --f)) filters[f]->Merge(&dataset); - IFSTAT(stats[f].td += clock() - t1); + if (ID & (1 << --f)) + filters_[f]->Merge(&dataset); + IFSTAT(stats_[f].td += clock() - t1); } } catch (CprsErr &err) { return err; diff --git a/storage/tianmu/compress/part_dict.h b/storage/tianmu/compress/part_dict.h index 1e17af9db..b5620d902 100644 --- a/storage/tianmu/compress/part_dict.h +++ b/storage/tianmu/compress/part_dict.h @@ -53,7 +53,7 @@ template class PartDict : public DataFilt { public: static const uint MAXLEN = DICMAP_MAX; - static const uint MAXTOTAL = RangeCoder::MAX_TOTAL; + static const uint MAXTOTAL = RangeCoder::MAX_TOTAL_; static const uint MINOCCUR = 4; // how many times a value must occur to be frequent value static const uint MAXFREQ = (MAXTOTAL + 20) / MINOCCUR; // max no. of frequent values = max diff --git a/storage/tianmu/compress/ppm.cpp b/storage/tianmu/compress/ppm.cpp index 437399928..58c3cea8a 100644 --- a/storage/tianmu/compress/ppm.cpp +++ b/storage/tianmu/compress/ppm.cpp @@ -26,34 +26,35 @@ namespace Tianmu { namespace compress { -FILE *PPM::dump = NULL; -bool PPM::printstat = false; +FILE *PPM::dump_ = NULL; +bool PPM::printstat_ = false; PPM::PPM(const Symb *data, int dlen, ModelType mt, PPMParam param, uchar method) { if ((data == NULL) || (dlen <= 0) || (mt == ModelType::ModelNull)) return; switch (mt) { case ModelType::ModelSufTree: - model.reset(new SuffixTree<>(data, dlen)); + model_.reset(new SuffixTree<>(data, dlen)); break; case ModelType::ModelWordGraph: - model.reset(new WordGraph(data, dlen, method == 2)); + model_.reset(new WordGraph(data, dlen, method == 2)); break; default: TIANMU_ERROR("not implemented"); } - model->TransformForPPM(param); + model_->TransformForPPM(param); static int _i_ = 0; - if (printstat && dump && (++_i_ == 18)) model->PrintStat(dump); + if (printstat_ && dump_ && (++_i_ == 18)) + model_->PrintStat(dump_); } CprsErr PPM::CompressArith(char *dest, int &dlen, Symb *src, int slen) { // Data format: // [1B] [?] - // null PPM model - if (!model) { + // null PPM model_ + if (!model_) { if (dlen < slen + 1) return CprsErr::CPRS_ERR_BUF; dest[0] = 0; // method: no compression std::memcpy(dest + 1, src, slen); @@ -62,11 +63,12 @@ CprsErr PPM::CompressArith(char *dest, int &dlen, Symb *src, int slen) { } WordGraph *wg = NULL; try { - wg = dynamic_cast(model.get()); + wg = dynamic_cast(model_.get()); } catch (...) { wg = NULL; } - if (wg) ASSERT(wg->insatend == false, "should be 'wg->insatend == false'"); + if (wg) + ASSERT(wg->insatend_ == false, "should be 'wg->insatend_ == false'"); // DEBUG_ASSERT(src[slen-1] == 0); ArithCoder coder; @@ -79,31 +81,31 @@ CprsErr PPM::CompressArith(char *dest, int &dlen, Symb *src, int slen) { bs_dest.PutByte(1); // compression method [1 byte], currently 1; 0 means no // compression (data are copied) - // SufTree::State stt = model->InitState(); + // SufTree::State stt = model_->InitState(); // SufTree::Edge edge; Range rng; Count total; - model->InitPPM(); - model->logfile = dump; + model_->InitPPM(); + model_->log_file_ = dump_; coder.InitCompress(); // int esc = 0; int len; for (int i = 0; i < slen;) { - // if(dump) std::fprintf(dump, "%d %d %d\n", _n_++, i, stt); + // if(dump_) std::fprintf(dump_, "%d %d %d\n", _n_++, i, stt); // if(_n_ >= 14860)//314712) // i = i; len = slen - i; - model->Move(src + i, len, rng, total); + model_->Move(src + i, len, rng, total); i += len; - // model->FindEdgeS(stt, edge, src + i, slen - i); - // model->GetRange(stt, edge, rng); - // total = model->GetTotal(stt); + // model_->FindEdgeS(stt, edge, src + i, slen - i); + // model_->GetRange(stt, edge, rng); + // total = model_->GetTotal(stt); // DEBUG_ASSERT(rng.high <= total); - // len = model->GetLen(stt, edge); + // len = model_->GetLen(stt, edge); // encode 'rng' coder.ScaleRange(&bs_dest, rng.low, rng.high, total); @@ -112,28 +114,28 @@ CprsErr PPM::CompressArith(char *dest, int &dlen, Symb *src, int slen) { // //if(edge.n == 0) esc += bs_dest.GetPos() - // pos1; // count bits used to // encode ESC symbols - // if(makedump && dump) { - // std::fprintf(dump, "%d\t", - // model->GetDep(stt)); if(edge.n == 0) - // std::fprintf(dump, "<-"); else { + // if(makedump && dump_) { + // std::fprintf(dump_, "%d\t", + // model_->GetDep(stt)); if(edge.n == 0) + // std::fprintf(dump_, "<-"); else { // for(int j //= i; j < i+len; j++) { // char s = (char)src[j]; // if((s == 0) || (s == 10) //|| (s == 13) || (s == '\t')) s = //'#'; - // fputc(s, dump); + // fputc(s, dump_); // } // } - // std::fprintf(dump, "\t%d\n", + // std::fprintf(dump_, "\t%d\n", // bs_dest.GetPos() - pos1); // } // # endif // // i += len; - // model->Move(stt, edge); + // model_->Move(stt, edge); } - // if(dump) std::fprintf(dump, "Bytes used to encode ESC symbols: %d\n", + // if(dump_) std::fprintf(dump_, "Bytes used to encode ESC symbols: %d\n", // (esc+7)/8); coder.EndCompress(&bs_dest); @@ -169,42 +171,43 @@ CprsErr PPM::DecompressArith(Symb *dest, int dlen, char *src, int slen) { WordGraph *wg = NULL; try { - wg = dynamic_cast(model.get()); + wg = dynamic_cast(model_.get()); } catch (...) { wg = NULL; } - if (wg) ASSERT(wg->insatend == false, "should be 'wg->insatend == false'"); + if (wg) + ASSERT(wg->insatend_ == false, "should be 'wg->insatend_ == false'"); ArithCoder coder; BitStream bs_src(src, slen * 8, 8); // 1 byte already read try { - // SufTree::State stt = model->InitState(); + // SufTree::State stt = model_->InitState(); // SufTree::Edge edge; Range rng; Count c, total; int len; CprsErr err; - model->InitPPM(); + model_->InitPPM(); coder.InitDecompress(&bs_src); // int _n_ = 0; for (int i = 0; i < dlen;) { - // if(dump) std::fprintf(dump, "%d %d %d\n", _n_++, i, stt); + // if(dump_) std::fprintf(dump_, "%d %d %d\n", _n_++, i, stt); // find the next edge to move - total = model->GetTotal(); + total = model_->GetTotal(); c = coder.GetCount(total); len = dlen - i; - err = model->Move(c, dest + i, len, rng); + err = model_->Move(c, dest + i, len, rng); if (static_cast(err)) return err; i += len; - // model->FindEdgeC(stt, edge, c); + // model_->FindEdgeC(stt, edge, c); // remove the decoded data from the source - // model->GetRange(stt, edge, rng); + // model_->GetRange(stt, edge, rng); // DEBUG_ASSERT(rng.high <= total); coder.RemoveSymbol(&bs_src, rng.low, rng.high, total); @@ -213,12 +216,12 @@ CprsErr PPM::DecompressArith(Symb *dest, int dlen, char *src, int slen) { // i = i; // len = dlen - i; - // err = model->GetLabel(stt, edge, dest + i, len); + // err = model_->GetLabel(stt, edge, dest + i, len); // if(static_cast(err)) // return err; // i += len; - // model->Move(stt, edge); + // model_->Move(stt, edge); } } catch (ErrBufOverrun &) { return CprsErr::CPRS_ERR_BUF; @@ -230,8 +233,8 @@ CprsErr PPM::DecompressArith(Symb *dest, int dlen, char *src, int slen) { CprsErr PPM::Compress(char *dest, int &dlen, Symb *src, int slen) { // return CompressArith(dest, dlen, src, slen); - // null PPM model - if (!model) { + // null PPM model_ + if (!model_) { if (dlen < slen + 1) return CprsErr::CPRS_ERR_BUF; dest[0] = 0; // method: no compression std::memcpy(dest + 1, src, slen); @@ -239,7 +242,7 @@ CprsErr PPM::Compress(char *dest, int &dlen, Symb *src, int slen) { return CprsErr::CPRS_SUCCESS; } // try { - // WordGraph* wg = dynamic_cast(model); + // WordGraph* wg = dynamic_cast(model_); // if(wg) wg->insatend = true; //} catch(...){} @@ -248,12 +251,13 @@ CprsErr PPM::Compress(char *dest, int &dlen, Symb *src, int slen) { WordGraph *wg = NULL; try { - wg = dynamic_cast(model.get()); + wg = dynamic_cast(model_.get()); } catch (...) { wg = NULL; } - if (wg) ASSERT(wg->insatend, "'wg->insatend' should be true"); + if (wg) + ASSERT(wg->insatend_, "'wg->insatend_' should be true"); RangeCoder coder; coder.InitCompress(dest + 1, dlen - 1); @@ -265,13 +269,13 @@ CprsErr PPM::Compress(char *dest, int &dlen, Symb *src, int slen) { Range rng; Count total; - model->InitPPM(); - model->logfile = dump; + model_->InitPPM(); + model_->log_file_ = dump_; int len; for (int i = 0; i < slen;) { len = slen - i; - model->Move(src + i, len, rng, total); + model_->Move(src + i, len, rng, total); i += len; coder.Encode(rng.low, rng.high - rng.low, total); } @@ -312,12 +316,13 @@ CprsErr PPM::Decompress(Symb *dest, int dlen, char *src, int slen) { WordGraph *wg = NULL; try { - wg = dynamic_cast(model.get()); + wg = dynamic_cast(model_.get()); } catch (...) { wg = NULL; } - if (wg) ASSERT(wg->insatend, "'wg->insatend' should be true"); + if (wg) + ASSERT(wg->insatend_, "'wg->insatend_' should be true"); RangeCoder coder; coder.InitDecompress(src + 1, slen - 1); @@ -327,15 +332,15 @@ CprsErr PPM::Decompress(Symb *dest, int dlen, char *src, int slen) { int len; CprsErr err; - model->InitPPM(); + model_->InitPPM(); for (int i = 0; i < dlen;) { // find the next edge to move - total = model->GetTotal(); + total = model_->GetTotal(); c = coder.GetCount(total); len = dlen - i; - err = model->Move(c, dest + i, len, rng); + err = model_->Move(c, dest + i, len, rng); if (static_cast(err)) return err; i += len; @@ -349,7 +354,9 @@ CprsErr PPM::Decompress(Symb *dest, int dlen, char *src, int slen) { return CprsErr::CPRS_SUCCESS; } -void PPM::PrintInfo(std::ostream &str) { str << "No. of all nodes in the model: " << model->GetNNodes() << std::endl; } +void PPM::PrintInfo(std::ostream &str) { + str << "No. of all nodes in the model_: " << model_->GetNNodes() << std::endl; +} } // namespace compress } // namespace Tianmu diff --git a/storage/tianmu/compress/ppm.h b/storage/tianmu/compress/ppm.h index 276399809..09bc7ff03 100644 --- a/storage/tianmu/compress/ppm.h +++ b/storage/tianmu/compress/ppm.h @@ -28,7 +28,7 @@ namespace compress { class PPM { using Symb = uchar; - std::unique_ptr model; + std::unique_ptr model_; // compression and decompression using ArithCoder CprsErr CompressArith(char *dest, int &dlen, Symb *src, int slen); @@ -38,10 +38,10 @@ class PPM { enum class ModelType { ModelNull, ModelSufTree, ModelWordGraph }; // enum CoderType { CoderArith, CoderRange }; - static FILE *dump; - static bool printstat; + static FILE *dump_; + static bool printstat_; - // if data=NULL or dlen=0, the model will be null - compression will simply + // if data=NULL or dlen=0, the model_ will be null - compression will simply // copy the data; 'method' - which version of compression will be used in // Compress/Decompress PPM(const Symb *data, int dlen, ModelType mt, PPMParam param = PPMParam(), uchar method = 2); diff --git a/storage/tianmu/compress/ppm_defs.h b/storage/tianmu/compress/ppm_defs.h index 57127af4b..1becd0703 100644 --- a/storage/tianmu/compress/ppm_defs.h +++ b/storage/tianmu/compress/ppm_defs.h @@ -58,7 +58,7 @@ struct PPMParam { class PPMModel { protected: using Symb = uchar; - static const int NSymb = 256; + static const int N_Symb_ = 256; public: virtual void TransformForPPM(PPMParam param_ = PPMParam()) = 0; @@ -79,9 +79,9 @@ class PPMModel { virtual int GetMemAlloc() = 0; // total number of bytes used virtual void PrintStat([[maybe_unused]] FILE *f) {} // file for making logs during compression or decompression - FILE *logfile; + FILE *log_file_; - PPMModel() : logfile(NULL) {} + PPMModel() : log_file_(NULL) {} virtual ~PPMModel() {} }; diff --git a/storage/tianmu/compress/range_code.h b/storage/tianmu/compress/range_code.h index c101b1a4a..9effedc84 100644 --- a/storage/tianmu/compress/range_code.h +++ b/storage/tianmu/compress/range_code.h @@ -30,105 +30,107 @@ namespace compress { /* Decoder always reads exactly the same no. of bytes as the encoder saves */ class RangeCoder { - static const uint TOP = 1u << 24; - static const uint BOT = 1u << 16; - // static const uint BOT = 1u << 15; + static const uint TOP_ = 1u << 24; + static const uint BOT_ = 1u << 16; + // static const uint BOT_ = 1u << 15; - uint low, code, range, _tot, _sh; - uchar *buf, *pos, *stop; + uint low_, code_, range_, _tot_, _sh_; + uchar *buf_, *pos_, *stop_; // constants for uniform encoding: - static const uint uni_nbit = 16; - static const uint uni_mask = (1u << uni_nbit) - 1; - static const uint uni_total = 1u << uni_nbit; + static const uint uni_nbit_ = 16; + static const uint uni_mask_ = (1u << uni_nbit_) - 1; + static const uint uni_total_ = 1u << uni_nbit_; uchar InByte() { - if (pos >= stop) throw CprsErr::CPRS_ERR_BUF; - return *pos++; + if (pos_ >= stop_) + throw CprsErr::CPRS_ERR_BUF; + return *pos_++; } void OutByte(uchar c) { - if (pos >= stop) throw CprsErr::CPRS_ERR_BUF; - *pos++ = c; + if (pos_ >= stop_) + throw CprsErr::CPRS_ERR_BUF; + *pos_++ = c; } public: - static const uint MAX_TOTAL = BOT; + static const uint MAX_TOTAL_ = BOT_; - RangeCoder() { buf = pos = stop = 0; } - uint GetPos() { return (uint)(pos - buf); } // !!! position in BYTES !!! + RangeCoder() { buf_ = pos_ = stop_ = 0; } + uint GetPos() { return (uint)(pos_ - buf_); } // !!! position in BYTES !!! void InitCompress(void *b, uint len, uint p = 0) { - buf = (uchar *)b; - pos = buf + p; - stop = buf + len; - low = 0; - range = (uint)-1; + buf_ = (uchar *)b; + pos_ = buf_ + p; + stop_ = buf_ + len; + low_ = 0; + range_ = (uint)-1; } void EndCompress() { - for (int i = 0; i < 4; i++) OutByte(low >> 24), low <<= 8; + for (int i = 0; i < 4; i++) OutByte(low_ >> 24), low_ <<= 8; } void InitDecompress(void *b, uint len, uint p = 0) { - buf = (uchar *)b; - pos = buf + p; - stop = buf + len; - low = code = 0; - range = (uint)-1; - for (int i = 0; i < 4; i++) code = code << 8 | InByte(); + buf_ = (uchar *)b; + pos_ = buf_ + p; + stop_ = buf_ + len; + low_ = code_ = 0; + range_ = (uint)-1; + for (int i = 0; i < 4; i++) code_ = code_ << 8 | InByte(); } void Encode(uint cumFreq, uint freq, uint total) { - DEBUG_ASSERT(freq && cumFreq + freq <= total && total <= MAX_TOTAL); - DEBUG_ASSERT(range >= BOT && low + range - 1 >= low); - low += (range /= total) * cumFreq; - range *= freq; - while (((low ^ (low + range)) < TOP) || ((range < BOT) && ((range = -low & (BOT - 1)), 1))) - OutByte(low >> 24), low <<= 8, range <<= 8; + DEBUG_ASSERT(freq && cumFreq + freq <= total && total <= MAX_TOTAL_); + DEBUG_ASSERT(range_ >= BOT_ && low_ + range_ - 1 >= low_); + low_ += (range_ /= total) * cumFreq; + range_ *= freq; + while (((low_ ^ (low_ + range_)) < TOP_) || ((range_ < BOT_) && ((range_ = -low_ & (BOT_ - 1)), 1))) + OutByte(low_ >> 24), low_ <<= 8, range_ <<= 8; } uint GetCount(uint total) { #if defined(_DEBUG) || !defined(NDEBUG) - _tot = total; + _tot_ = total; #endif - DEBUG_ASSERT(range >= BOT && low + range - 1 >= code && code >= low); - uint tmp = (code - low) / (range /= total); + DEBUG_ASSERT(range_ >= BOT_ && low_ + range_ - 1 >= code_ && code_ >= low_); + uint tmp = (code_ - low_) / (range_ /= total); DEBUG_ASSERT(tmp < total); if (tmp >= total) throw CprsErr::CPRS_ERR_COR; return tmp; } void Decode(uint cumFreq, uint freq, [[maybe_unused]] uint total) { - DEBUG_ASSERT(_tot == total && freq && cumFreq + freq <= total && total <= MAX_TOTAL); - low += range * cumFreq; - range *= freq; - while (((low ^ (low + range)) < TOP) || ((range < BOT) && ((range = -low & (BOT - 1)), 1))) - code = code << 8 | InByte(), low <<= 8, range <<= 8; - // NOTE: after range= BOT && low + range - 1 >= low); - low += (range _SHR_ASSIGN_ shift)*cumFreq; - range *= freq; - while ((low ^ (low + range)) < TOP || (range < BOT && ((range = -low & (BOT - 1)), 1))) - OutByte(low >> 24), low <<= 8, range <<= 8; + DEBUG_ASSERT(cumFreq + freq <= (1u _SHL_ shift) && freq && (1u _SHL_ shift) <= MAX_TOTAL_); + DEBUG_ASSERT(range_ >= BOT_ && low_ + range_ - 1 >= low_); + low_ += (range_ _SHR_ASSIGN_ shift)*cumFreq; + range_ *= freq; + while ((low_ ^ (low_ + range_)) < TOP_ || (range_ < BOT_ && ((range_ = -low_ & (BOT_ - 1)), 1))) + OutByte(low_ >> 24), low_ <<= 8, range_ <<= 8; } uint GetCountShift(uint shift) { #if defined(_DEBUG) || !defined(NDEBUG) - _sh = shift; + _sh_ = shift; #endif - DEBUG_ASSERT(range >= BOT && low + range - 1 >= code && code >= low); - uint tmp = (code - low) / (range _SHR_ASSIGN_ shift); + DEBUG_ASSERT(range_ >= BOT_ && low_ + range_ - 1 >= code_ && code_ >= low_); + uint tmp = (code_ - low_) / (range_ _SHR_ASSIGN_ shift); if (tmp >= (1u << shift)) throw CprsErr::CPRS_ERR_COR; return tmp; } void DecodeShift(uint cumFreq, uint freq, [[maybe_unused]] uint shift) { - DEBUG_ASSERT(_sh == shift && cumFreq + freq <= (1u _SHL_ shift) && freq && (1u _SHL_ shift) <= MAX_TOTAL); - low += range * cumFreq; - range *= freq; - while (((low ^ (low + range)) < TOP) || ((range < BOT) && ((range = -low & (BOT - 1)), 1))) - code = code << 8 | InByte(), low <<= 8, range <<= 8; + DEBUG_ASSERT(_sh_ == shift && cumFreq + freq <= (1u _SHL_ shift) && freq && (1u _SHL_ shift) <= MAX_TOTAL_); + low_ += range_ * cumFreq; + range_ *= freq; + while (((low_ ^ (low_ + range_)) < TOP_) || ((range_ < BOT_) && ((range_ = -low_ & (BOT_ - 1)), 1))) + code_ = code_ << 8 | InByte(), low_ <<= 8, range_ <<= 8; } // uniform compression and decompression (must be: val <= maxval) @@ -138,18 +140,18 @@ class RangeCoder { DEBUG_ASSERT((((uint64_t)maxval >> bitmax) == 0) || bitmax >= 64); if (maxval == 0) return; - // encode groups of 'uni_nbit' bits, from the least significant - DEBUG_ASSERT(uni_total <= MAX_TOTAL); - while (bitmax > uni_nbit) { - EncodeShift((uint)(val & uni_mask), 1, uni_nbit); - DEBUG_ASSERT(uni_nbit < sizeof(T) * 8); - val >>= uni_nbit; - maxval >>= uni_nbit; - bitmax -= uni_nbit; + // encode groups of 'uni_nbit_' bits, from the least significant + DEBUG_ASSERT(uni_total_ <= MAX_TOTAL_); + while (bitmax > uni_nbit_) { + EncodeShift((uint)(val & uni_mask_), 1, uni_nbit_); + DEBUG_ASSERT(uni_nbit_ < sizeof(T) * 8); + val >>= uni_nbit_; + maxval >>= uni_nbit_; + bitmax -= uni_nbit_; } // encode the most significant group - // ASSERT(maxval < MAX_TOTAL, "should be 'maxval < MAX_TOTAL'"); // compiler + // ASSERT(maxval < MAX_TOTAL_, "should be 'maxval < MAX_TOTAL_'"); // compiler // figure out as allways true Encode((uint)val, 1, (uint)maxval + 1); } @@ -165,21 +167,21 @@ class RangeCoder { if (maxval == 0) return; DEBUG_ASSERT((((uint64_t)maxval >> bitmax) == 0) || bitmax >= 64); - // decode groups of 'uni_nbit' bits, from the least significant - DEBUG_ASSERT(uni_total <= MAX_TOTAL); + // decode groups of 'uni_nbit_' bits, from the least significant + DEBUG_ASSERT(uni_total_ <= MAX_TOTAL_); uint v, shift = 0; - while (shift + uni_nbit < bitmax) { - v = GetCountShift(uni_nbit); - DecodeShift(v, 1, uni_nbit); + while (shift + uni_nbit_ < bitmax) { + v = GetCountShift(uni_nbit_); + DecodeShift(v, 1, uni_nbit_); DEBUG_ASSERT(shift < 64); val |= (uint64_t)v << shift; - shift += uni_nbit; + shift += uni_nbit_; } // decode the most significant group DEBUG_ASSERT(shift < sizeof(maxval) * 8); uint total = (uint)(maxval >> shift) + 1; - DEBUG_ASSERT(total <= MAX_TOTAL); + DEBUG_ASSERT(total <= MAX_TOTAL_); v = GetCount(total); Decode(v, 1, total); val |= (uint64_t)v << shift; @@ -194,38 +196,38 @@ class RangeCoder { // uniform compression by shifting template void EncodeUniShift(T x, uint shift) { - if (shift <= uni_nbit) + if (shift <= uni_nbit_) EncodeShift((uint)x, 1, shift); else { - EncodeShift((uint)x & uni_mask, 1, uni_nbit); - EncodeUniShift(x >> uni_nbit, shift - uni_nbit); + EncodeShift((uint)x & uni_mask_, 1, uni_nbit_); + EncodeUniShift(x >> uni_nbit_, shift - uni_nbit_); } } template void DecodeUniShift(T &x, uint shift) { - if (shift <= uni_nbit) { + if (shift <= uni_nbit_) { x = (T)GetCountShift(shift); DecodeShift((uint)x, 1, shift); } else { - uint tmp = GetCountShift(uni_nbit); - DecodeShift(tmp, 1, uni_nbit); - DecodeUniShift(x, shift - uni_nbit); - DEBUG_ASSERT(uni_nbit < sizeof(x) * 8); - x = (x << uni_nbit) | (T)tmp; + uint tmp = GetCountShift(uni_nbit_); + DecodeShift(tmp, 1, uni_nbit_); + DecodeUniShift(x, shift - uni_nbit_); + DEBUG_ASSERT(uni_nbit_ < sizeof(x) * 8); + x = (x << uni_nbit_) | (T)tmp; } } }; template <> inline void RangeCoder::EncodeUniShift(uchar x, uint shift) { - DEBUG_ASSERT(shift <= uni_nbit); + DEBUG_ASSERT(shift <= uni_nbit_); EncodeShift((uint)x, 1, shift); } template <> inline void RangeCoder::EncodeUniShift(ushort x, uint shift) { - DEBUG_ASSERT(shift <= uni_nbit); + DEBUG_ASSERT(shift <= uni_nbit_); EncodeShift((uint)x, 1, shift); } diff --git a/storage/tianmu/compress/suffix_tree.cpp b/storage/tianmu/compress/suffix_tree.cpp index 7c63b9f1e..d246e38dd 100644 --- a/storage/tianmu/compress/suffix_tree.cpp +++ b/storage/tianmu/compress/suffix_tree.cpp @@ -25,57 +25,57 @@ namespace compress { template void SuffixTree::Init() { - dlen = -1; - NIL = 0; - ROOT = 1; - nleaves = 0; - nodes.reserve(VECT_INIT); - nodes.resize(1); + dlen_ = -1; + NIL_ = 0; + ROOT_ = 1; + nleaves_ = 0; + nodes_.reserve(VECT_INIT); + nodes_.resize(1); #ifdef SUFTREE_STAT - nodes[NIL].dep = -1; + nodes_[NIL_].dep = -1; #endif - iscreating = false; + iscreating_ = false; } template void SuffixTree::Create() { - iscreating = true; + iscreating_ = true; - lens.reset(new int[dlen]); - for (int z = 0, i = 0; z < dlen; z++) - if (data[z] == '\0') - for (; i <= z; i++) lens[i] = z + 1 - i; + lens_.reset(new int[dlen_]); + for (int z = 0, i = 0; z < dlen_; z++) + if (data_[z] == '\0') + for (; i <= z; i++) lens_[i] = z + 1 - i; // make root node - nodes.resize(2); - Node &root = GetNode(ROOT); + nodes_.resize(2); + Node &root = GetNode(ROOT_); root = Node(); root.len = 1; - root.prev = ROOT; + root.prev = ROOT_; // active point - Point active = {ROOT, 0, 0}; + Point active = {ROOT_, 0, 0}; - const uchar *s = data.get(); - PNode last_int = NIL, - last_leaf = NIL; // index of the recently created internal and leaf node; - // stored for initialization of their 'suf' links + const uchar *s = data_.get(); + PNode last_int = NIL_, + last_leaf = NIL_; // index of the recently created internal and leaf node; + // stored for initialization of their 'suf' links // add leaf representing string "\0" AddLeaf(active, 0, last_int, last_leaf); - GetNode(last_leaf).suf = ROOT; + GetNode(last_leaf).suf = ROOT_; GetNode(last_leaf).count = 0; - last_leaf = NIL; + last_leaf = NIL_; int nsuf = 1; - while (nsuf < dlen) { + while (nsuf < dlen_) { // only if active point is at node, it's worth invoking MoveDown() if (active.proj == 0) { // after following a suffix link, we reached a node, so we already know // how to initialize the suf link of the recently created node - if (last_int != NIL) { + if (last_int != NIL_) { GetNode(last_int).suf = active.n; - last_int = NIL; + last_int = NIL_; } // maybe we can move further down the tree @@ -86,9 +86,9 @@ void SuffixTree::Create() { // to the root, and update counts of the passed leaves. Finally, we skip // to the next string. if (IsLeaf(active.n)) { - if (last_leaf != NIL) { + if (last_leaf != NIL_) { GetNode(last_leaf).suf = active.n; - last_leaf = NIL; + last_leaf = NIL_; } do { DEBUG_ASSERT(*(s - 1) == '\0'); @@ -96,21 +96,21 @@ void SuffixTree::Create() { DEBUG_ASSERT(active.proj == 0); GetNode(active.n).count++; MoveSuffix(active, s); - } while (active.n != ROOT); // TODO: time - optimize the leaf counting + } while (active.n != ROOT_); // TODO: time - optimize the leaf counting // GetNode(active.n).count++; - nsuf = (int)(s - data.get()); + nsuf = (int)(s - data_.get()); continue; } } - AddLeaf(active, (int)(s - data.get()), last_int, last_leaf); + AddLeaf(active, (int)(s - data_.get()), last_int, last_leaf); MoveSuffix(active, s); nsuf++; } - iscreating = false; + iscreating_ = false; } template @@ -123,11 +123,12 @@ void SuffixTree::MoveDown(Point &p, const uchar *&s) { for (;;) { // read the first symbol of the next edge p.next = GetChild(p.n, *s); - if (p.next == NIL) return; + if (p.next == NIL_) + return; proj = 1; s++; - // if((*s == '\0') && (p.n != NIL)) { + // if((*s == '\0') && (p.n != NIL_)) { // s++; // p.n = p.next; // p.proj = 0; @@ -137,7 +138,7 @@ void SuffixTree::MoveDown(Point &p, const uchar *&s) { // s++; // read all next symbols of the edge - lbl = data.get() + GetNode(p.next).pos; // text of the edge label + lbl = data_.get() + GetNode(p.next).pos; // text of the edge label len = GetNode(p.next).len; while ((proj < len) && (*s == lbl[proj])) { proj++; @@ -153,21 +154,22 @@ void SuffixTree::MoveDown(Point &p, const uchar *&s) { template void SuffixTree::AddLeaf(Point p, int pos, PNode &last_int, PNode &last_leaf) { // allocate a new leaf - PNode nleaf = (int)nodes.size(); - nodes.resize(nleaf + 1); + PNode nleaf = (int)nodes_.size(); + nodes_.resize(nleaf + 1); Node &leaf = GetNode(nleaf); - leaf.child = NIL; + leaf.child = NIL_; leaf.pos = pos; - leaf.len = lens[pos]; + leaf.len = lens_[pos]; leaf.count = 1; - uchar s = data[pos]; + uchar s = data_[pos]; // add leaf below an existing node... if (p.proj == 0) { AddChild(p.n, s, nleaf); - if (last_int != NIL) GetNode(last_int).suf = p.n; - last_int = NIL; + if (last_int != NIL_) + GetNode(last_int).suf = p.n; + last_int = NIL_; #ifdef SUFTREE_STAT GetNode(nleaf).dep = GetNode(p.n).dep + GetNode(nleaf).len; @@ -176,8 +178,8 @@ void SuffixTree::AddLeaf(Point p, int pos, PNode &last_int, PNode & // ...or split the edge, add new node and a leaf else { - int nint = (int)nodes.size(); // index of new node - nodes.resize(nint + 1); // TODO: memory, time - allocate 'nodes' on the basis of prediction + int nint = (int)nodes_.size(); // index of new node + nodes_.resize(nint + 1); // TODO: memory, time - allocate 'nodes_' on the basis of prediction Node &n = GetNode(nint); Node &prev = GetNode(p.n); @@ -188,13 +190,14 @@ void SuffixTree::AddLeaf(Point p, int pos, PNode &last_int, PNode & next.pos += n.len; next.len -= n.len; - ChgChild(prev, data[n.pos], + ChgChild(prev, data_[n.pos], nint); // replace child 'next' of 'prev' with 'n' - AddChild(n, data[next.pos], p.next); + AddChild(n, data_[next.pos], p.next); AddChild(n, s, nleaf); // now we can initialize the 'suf' link of the previously created node - if (last_int != NIL) GetNode(last_int).suf = nint; + if (last_int != NIL_) + GetNode(last_int).suf = nint; last_int = nint; #ifdef SUFTREE_STAT @@ -203,11 +206,12 @@ void SuffixTree::AddLeaf(Point p, int pos, PNode &last_int, PNode & #endif } - if (last_leaf != NIL) GetNode(last_leaf).suf = nleaf; + if (last_leaf != NIL_) + GetNode(last_leaf).suf = nleaf; last_leaf = nleaf; // # ifdef SUFTREE_STAT - nleaves++; + nleaves_++; // # endif } @@ -223,7 +227,7 @@ void SuffixTree::Canonize(Point &p, const uchar *s) { int len; while (p.proj > 0) { ch = GetChild(p.n, *(s - p.proj)); - DEBUG_ASSERT(ch != NIL); + DEBUG_ASSERT(ch != NIL_); len = GetNode(ch).len; if (p.proj < (uint)len) { p.next = ch; @@ -236,9 +240,9 @@ void SuffixTree::Canonize(Point &p, const uchar *s) { template void SuffixTree::Clear() { - nodes.clear(); - lens.reset(); - data.reset(); + nodes_.clear(); + lens_.reset(); + data_.reset(); } template @@ -250,11 +254,11 @@ int SuffixTree::GetSizeCDAWG(PNode n, bool cut) { ASSERT(nd.count == GetNode(nd.suf).count, "should be 'nd.count == GetNode(nd.suf).count'"); // if(nd.count != GetNode(nd.suf).count) { cout << "ASSERT FAILED\n"; return // 0; } - } else if ((n != ROOT) && (nd.count == GetNode(nd.suf).count)) + } else if ((n != ROOT_) && (nd.count == GetNode(nd.suf).count)) cut = true; PNode ch = nd.child; - while (ch != NIL) { + while (ch != NIL_) { ret += GetSizeCDAWG(ch, cut); ch = GetNode(ch).next; } @@ -265,15 +269,17 @@ int SuffixTree::GetSizeCDAWG(PNode n, bool cut) { template int SuffixTree::GetMemUsage() { - size_t x = nodes.size() * sizeof(Node); - if (data) x += dlen * sizeof(data[0]); + size_t x = nodes_.size() * sizeof(Node); + if (data_) + x += dlen_ * sizeof(data_[0]); return (int)x; } template int SuffixTree::GetMemAlloc() { - size_t x = nodes.capacity() * sizeof(Node); - if (data) x += dlen * sizeof(data[0]); + size_t x = nodes_.capacity() * sizeof(Node); + if (data_) + x += dlen_ * sizeof(data_[0]); return (int)x; } @@ -285,23 +291,23 @@ void SuffixTree::PrintInd(std::ostream &str, int ind) { template void SuffixTree::PrintSub(std::ostream &str, int start, int stop) { if (stop < 0) - str << (char *)data.get() + start << '#'; + str << (char *)data_.get() + start << '#'; else { char c; - for (; start < stop; start++) str << ((c = (char)data[start]) ? c : '#'); + for (; start < stop; start++) str << ((c = (char)data_[start]) ? c : '#'); } } template void SuffixTree::Print(std::ostream &str, uint flags, PNode n, PNode prev, int ind) { - if (n == NIL) { - n = ROOT; - prev = NIL; + if (n == NIL_) { + n = ROOT_; + prev = NIL_; } Node &node = GetNode(n); // if(IsInternal(n)) { - int len = (n == ROOT) ? 0 : node.len; + int len = (n == ROOT_) ? 0 : node.len; PrintInd(str, ind); if (flags & 1) { @@ -317,7 +323,8 @@ void SuffixTree::Print(std::ostream &str, uint flags, PNode n, PNod ind += len; for (int i = 1; i <= NSymb; i++) { PNode ch = GetChild(n, i % NSymb); - if (ch != NIL) Print(str, flags, ch, n, ind); + if (ch != NIL_) + Print(str, flags, ch, n, ind); } (void)prev; // FIXME //} @@ -330,9 +337,9 @@ void SuffixTree::Print(std::ostream &str, uint flags, PNode n, PNod // str << ' '; // } // if(flags & 2) - // str << (dlen - pos) << ' '; + // str << (dlen_ - pos) << ' '; // if(flags & 4) - // str << dlen - node.dep << ' '; + // str << dlen_ - node.dep << ' '; // if(flags & 8) // str << node.count << ' '; // str << " (" << node.dep << ")" << endl; @@ -343,21 +350,21 @@ void SuffixTree::Print(std::ostream &str, uint flags, PNode n, PNod template void SuffixTree::TransformForPPM(PPMParam param_) { - param = param_; + param_ = param_; - // compute counts of all nodes in the 'tree' + // compute counts of all nodes_ in the 'tree' SetCounts(); #ifdef SUFTREE_CDAWG_SIZE - cout << "No. of nodes: " << GetNNodes() << endl; + cout << "No. of nodes_: " << GetNNodes() << endl; cout << "No. of edges to remove if the tree were turned into CDAWG: " << GetSizeCDAWG() << endl; #endif // cut off unnecessary branches - // if(param.valid_count > 1.0) - // Prune(ROOT, false); + // if(param_.valid_count > 1.0) + // Prune(ROOT_, false); - // set cumulative counts of nodes - field 'sum' + // set cumulative counts of nodes_ - field 'sum' // TODO: time - merge SetSums() and SetSuffix() SetSums(); @@ -369,7 +376,7 @@ template void SuffixTree::SetCounts() { std::vector stack; stack.reserve(STACK_INIT); - stack.push_back(StackElem(ROOT, false)); + stack.push_back(StackElem(ROOT_, false)); PNode n, ch; bool proc; @@ -379,17 +386,18 @@ void SuffixTree::SetCounts() { stack.pop_back(); Node &node = GetNode(n); ch = node.child; - if (ch == NIL) continue; + if (ch == NIL_) + continue; if (proc) { // children are already processed (their counts are correct) int &count = node.count; - while (ch != NIL) { + while (ch != NIL_) { count += GetNode(ch).count; ch = NxtChild(ch); } } else { // insert children onto the stack stack.push_back(StackElem(n, true)); - while (ch != NIL) { + while (ch != NIL_) { stack.push_back(StackElem(ch, false)); ch = NxtChild(ch); } @@ -415,7 +423,8 @@ template void SuffixTree::Prune(PNode n, bool cut) { SetCut(n, cut); PNode ch = GetNode(n).child; - if (ch == NIL) return; + if (ch == NIL_) + return; bool invalid = false; if (!cut) { @@ -428,35 +437,36 @@ void SuffixTree::Prune(PNode n, bool cut) { do { Prune(ch, cut); ch = NxtChild(ch); - } while (ch != NIL); + } while (ch != NIL_); - if (invalid) GetNode(n).child = NIL; // physically cut off children (but without destroying them) + if (invalid) + GetNode(n).child = NIL_; // physically cut off children (but without destroying them) } template bool SuffixTree::IsValid(PNode n) { - // TODO: more sophisticated validity ckecking of nodes + // TODO: more sophisticated validity ckecking of nodes_ // count children int num = 0; PNode ch = GetNode(n).child; - DEBUG_ASSERT(ch != NIL); + DEBUG_ASSERT(ch != NIL_); do { num++; ch = NxtChild(ch); - } while (ch != NIL); + } while (ch != NIL_); - return (GetNode(n).count >= param.valid_count * num); + return (GetNode(n).count >= param_.valid_count * num); } template void SuffixTree::SetSums() { - GetNode(ROOT).sum = 0; // root's count shouldn't be needed + GetNode(ROOT_).sum = 0; // root's count shouldn't be needed std::vector stack; stack.reserve(STACK_INIT); - stack.push_back(ROOT); + stack.push_back(ROOT_); - // int _xxx_ = 0; // no. of suf links where both nodes have the same count + // int _xxx_ = 0; // no. of suf links where both nodes_ have the same count PNode n, ch; int shift, cnt, scnt; while (!stack.empty()) { @@ -465,7 +475,8 @@ void SuffixTree::SetSums() { Node &node = GetNode(n); // if(node.count == GetNode(node.suf).count) _xxx_++; ch = node.child; - if (ch == NIL) continue; + if (ch == NIL_) + continue; shift = Shift(node.count); // how to shift children's counts Count sum = GetEscCount(n); @@ -483,7 +494,7 @@ void SuffixTree::SetSums() { stack.push_back(ch); ch = NxtChild(ch); - } while (ch != NIL); + } while (ch != NIL_); } // cout << "No. of unnecessary sufixes: " << _xxx_ << endl; } @@ -526,36 +537,37 @@ int SuffixTree::Shift(int c) { template void SuffixTree::SetSuffix() { - // recursive procedure SetSuffix(n) processed nodes in post-order; + // recursive procedure SetSuffix(n) processed nodes_ in post-order; // this one processes in pre-order (it is easier to implement iteratively) // TODO: ratio - more sophisticated choose of long suffix link std::vector stack; stack.reserve(STACK_INIT); - stack.push_back(ROOT); + stack.push_back(ROOT_); PNode n, ch; while (!stack.empty()) { n = stack.back(); stack.pop_back(); Node &node = GetNode(n); - DEBUG_ASSERT(n != NIL); + DEBUG_ASSERT(n != NIL_); // put children on stack - for (ch = node.child; ch != NIL; ch = NxtChild(ch)) stack.push_back(ch); + for (ch = node.child; ch != NIL_; ch = NxtChild(ch)) stack.push_back(ch); - if (n == ROOT) continue; + if (n == ROOT_) + continue; PNode &suf = node.suf; - if (data[node.pos + node.len - 1] == '\0') + if (data_[node.pos + node.len - 1] == '\0') // if the node's edge label ends with '\0', link the suffix directly to // the root - suf = ROOT; + suf = ROOT_; else // find suffix which is not cut off and has signifficantly different // distribution than 'n' // TODO: time - optimize searching for the best suffix link - while ((suf != NIL) && (GetCut(suf) || Similar(n, suf))) suf = GetNode(suf).suf; - // if(suf == NIL) suf = ROOT; + while ((suf != NIL_) && (GetCut(suf) || Similar(n, suf))) suf = GetNode(suf).suf; + // if(suf == NIL_) suf = ROOT_; } } // template @@ -588,47 +600,49 @@ void SuffixTree::SetSuffix() { template bool SuffixTree::Similar(PNode n, PNode suf) { int c1 = GetNode(n).count, c2 = GetNode(suf).count; - return (c2 < param.suf_ratio * c1); + return (c2 < param_.suf_ratio * c1); } //------------------------------------------------------------------------ template void SuffixTree::FindEdge(Edge &e, Symb *str, int len) { - if (state == NIL) { - e.n = ROOT; + if (state_ == NIL_) { + e.n = ROOT_; e.s = str[0]; return; } // find the edge beginning with symbol str[0] - e.n = GetChild(state, str[0]); - if (e.n == NIL) return; + e.n = GetChild(state_, str[0]); + if (e.n == NIL_) + return; // check if the rest of the edge label is the same as 'str' Node &child = GetNode(e.n); - if ((child.len > len) || (std::memcmp(str + 1, data.get() + child.pos + 1, child.len - 1) != 0)) e.n = NIL; + if ((child.len > len) || (std::memcmp(str + 1, data_.get() + child.pos + 1, child.len - 1) != 0)) + e.n = NIL_; } template void SuffixTree::FindEdge(Edge &e, Count c) { - if (state == NIL) { + if (state_ == NIL_) { DEBUG_ASSERT(c < NSymb); - e.n = ROOT; + e.n = ROOT_; e.s = (Symb)c; return; } - if (c < GetEscCount(state)) { - e.n = NIL; + if (c < GetEscCount(state_)) { + e.n = NIL_; return; } // find the edge with count 'c'; linear search // TODO: time - binary search given count; or sort children by descending // count - PNode ch = GetNode(state).child; + PNode ch = GetNode(state_).child; for (;;) { - DEBUG_ASSERT(ch != NIL); + DEBUG_ASSERT(ch != NIL_); if (c < GetNode(ch).sum) { e.n = ch; return; @@ -639,11 +653,11 @@ void SuffixTree::FindEdge(Edge &e, Count c) { template CprsErr SuffixTree::GetLabel(Edge e, Symb *lbl, int &len) { - if (e.n == NIL) { + if (e.n == NIL_) { len = 0; return CprsErr::CPRS_SUCCESS; } - if (e.n == ROOT) { + if (e.n == ROOT_) { if (len < 1) return CprsErr::CPRS_ERR_BUF; *lbl = e.s; len = 1; @@ -653,18 +667,18 @@ CprsErr SuffixTree::GetLabel(Edge e, Symb *lbl, int &len) { Node &n = GetNode(e.n); if (len < n.len) return CprsErr::CPRS_ERR_BUF; len = n.len; - std::memcpy(lbl, data.get() + n.pos, len); + std::memcpy(lbl, data_.get() + n.pos, len); return CprsErr::CPRS_SUCCESS; } template void SuffixTree::GetRange(PNode stt, Edge e, Range &r) { - if (e.n == ROOT) { + if (e.n == ROOT_) { r.low = e.s; r.high = e.s + 1; return; } - if (e.n == NIL) { + if (e.n == NIL_) { r.low = 0; r.high = GetEscCount(stt); return; @@ -681,22 +695,22 @@ void SuffixTree::GetRange(PNode stt, Edge e, Range &r) { template void SuffixTree::Move(Symb *str, int &len, Range &rng, Count &total) { - Edge e{NIL, 0}; + Edge e{NIL_, 0}; FindEdge(e, str, len); len = GetLen(e); - GetRange(state, e, rng); - total = GetTotal(state); + GetRange(state_, e, rng); + total = GetTotal(state_); DEBUG_ASSERT(rng.high <= total); Move(e); } template CprsErr SuffixTree::Move(Count c, Symb *str, int &len, Range &rng) { - Edge e{NIL, 0}; + Edge e{NIL_, 0}; FindEdge(e, c); CprsErr err = GetLabel(e, str, len); if (static_cast(err)) return err; - GetRange(state, e, rng); + GetRange(state_, e, rng); Move(e); return CprsErr::CPRS_SUCCESS; } diff --git a/storage/tianmu/compress/suffix_tree.h b/storage/tianmu/compress/suffix_tree.h index a6cf632e1..af5abcf4d 100644 --- a/storage/tianmu/compress/suffix_tree.h +++ b/storage/tianmu/compress/suffix_tree.h @@ -45,7 +45,7 @@ template class SuffixTree : public PPMModel { public: // node identifier (pointer); - // 0 - nil, 1 - root, positive - internal nodes, negative - leafs (-1 means + // 0 - nil, 1 - root, positive - internal nodes_, negative - leafs (-1 means // the first leaf, i.e. number 0) using PNode = int; @@ -66,8 +66,8 @@ class SuffixTree : public PPMModel { // transformation for PPM Symb fsym; // int-2, leaf-2 - PNode child; // int-1, leaf-NO // first child (NIL only for a leaf) - PNode next; // int-1, leaf-1 // sibling of this node (NIL for the last child) + PNode child; // int-1, leaf-NO // first child (NIL_ only for a leaf) + PNode next; // int-1, leaf-1 // sibling of this node (NIL_ for the last child) PNode prev; // int-2, leaf-2 // previous sibling; cyclic list (first // child points to the last) @@ -80,43 +80,45 @@ class SuffixTree : public PPMModel { Node() { std::memset(this, 0, sizeof *this); } }; - // all nodes - std::vector nodes; - PNode NIL, ROOT; + // all nodes_ + std::vector nodes_; + PNode NIL_, ROOT_; - int dlen; // length of 'data', INcluding the terminating symbol ('\0') - std::unique_ptr data; // original string - for reference - std::unique_ptr lens; // lens[i] - length of substring of 'data' from - // 'i' to the next '\0' (incl. '\0') + int dlen_; // length of 'data_', INcluding the terminating symbol ('\0') + std::unique_ptr data_; // original string - for reference + std::unique_ptr lens_; // lens_[i] - length of substring of 'data_' from + // 'i' to the next '\0' (incl. '\0') // global parameters and statistics - int nleaves; - bool iscreating; // = true during tree construction, false otherwise - PPMParam param; + int nleaves_; + bool iscreating_; // = true during tree construction, false otherwise + PPMParam param_; //------------------------------------------------------------------------- - Node &GetNode(PNode n) { return nodes[n]; } + Node &GetNode(PNode n) { return nodes_[n]; } PNode NxtChild(PNode ch) { return GetNode(ch).next; } PNode PrvChild(PNode ch) { return GetNode(ch).prev; } PNode GetChild(PNode n, Symb s) { #ifdef SUFTREE_STAT - if (iscreating) GetNode(n).nvisit++; + if (iscreating_) + GetNode(n).nvisit++; #endif - if (n == NIL) return ROOT; + if (n == NIL_) + return ROOT_; PNode ch = GetNode(n).child; - while (ch != NIL) + while (ch != NIL_) if (GetNode(ch).fsym == s) return ch; else ch = GetNode(ch).next; - return NIL; + return NIL_; } void AddChild(Node &n, Symb s, PNode m) { Node &mm = GetNode(m); mm.fsym = s; mm.next = n.child; - if (n.child == NIL) + if (n.child == NIL_) mm.prev = m; else { PNode &prev = GetNode(n.child).prev; @@ -139,7 +141,7 @@ class SuffixTree : public PPMModel { mm.prev = GetNode(*ch).prev; if (mm.prev == *ch) mm.prev = m; // 'mm' is the only child - else if (mm.next == NIL) + else if (mm.next == NIL_) GetNode(n.child).prev = m; // 'mm' is the last child out of 2 or more children else GetNode(mm.next).prev = m; // 'mm' is not the last child @@ -150,8 +152,8 @@ class SuffixTree : public PPMModel { // int GetLeafPos(PNode par, PNode leaf) { return GetNode(leaf).dep + // GetNode(par).dep; } - bool IsLeaf(PNode n) { return GetNode(n).child == NIL; } - bool IsInternal(PNode n) { return GetNode(n).child != NIL; } + bool IsLeaf(PNode n) { return GetNode(n).child == NIL_; } + bool IsInternal(PNode n) { return GetNode(n).child != NIL_; } struct Point { PNode n; uint proj; // number of symbols passed from 'n' down to 'next' (if Point at @@ -193,16 +195,16 @@ class SuffixTree : public PPMModel { bool GetCut(PNode n) { return GetNode(n).cut == 1; } void SetCut(PNode n, bool v) { GetNode(n).cut = (v ? 1 : 0); } - Count GetEscCount([[maybe_unused]] PNode n) { return param.esc_count; } + Count GetEscCount([[maybe_unused]] PNode n) { return param_.esc_count; } void SetCounts(); - // sets 'cut'=true for unnecessary nodes; physically breaks edges between - // pruned and non-pruned nodes + // sets 'cut'=true for unnecessary nodes_; physically breaks edges between + // pruned and non-pruned nodes_ void Prune(PNode n, bool cut); bool IsValid(PNode n); // is node 'n' useful for prediction? If not, its // children will be cut off - // 'sum' will be set only for not-cut-off nodes + // 'sum' will be set only for not-cut-off nodes_ void SetSums(); int Shift(int c); @@ -216,10 +218,10 @@ class SuffixTree : public PPMModel { //------------------------------------------------------------------------- // PPM compression/decompression - PNode state; + PNode state_; struct Edge { - PNode n; // e.n = NIL means "escape" - Symb s; // this is needed to identify exactly the edges from NIL to ROOT + PNode n; // e.n = NIL_ means "escape" + Symb s; // this is needed to identify exactly the edges from NIL_ to ROOT_ // (otherwise is undefined) }; @@ -229,23 +231,26 @@ class SuffixTree : public PPMModel { CprsErr GetLabel(Edge e, Symb *lbl, int &len); // 'len' - max size of lbl; upon exit: length of lbl int GetLen(Edge e) { - if (e.n == NIL) return 0; + if (e.n == NIL_) + return 0; return GetNode(e.n).len; } void GetRange(PNode stt, Edge e, Range &r); // 'stt' must be the parent of 'e' Count GetTotal(PNode stt) { - if (stt == NIL) return NSymb; + if (stt == NIL_) + return N_Symb_; Node &node = GetNode(stt); - if (node.child == NIL) return param.esc_count; + if (node.child == NIL_) + return param_.esc_count; return GetNode(GetNode(node.child).prev).sum; } void Move(Edge e) { - if (e.n == NIL) - state = GetNode(state).suf; + if (e.n == NIL_) + state_ = GetNode(state_).suf; else - state = e.n; + state_ = e.n; } //----------------------------------------------------------------------------- @@ -255,31 +260,31 @@ class SuffixTree : public PPMModel { // The resultant tree is a sum of trees constructed for each of the strings, // with every leaf storing the number of its repetitions. // If dlen_=-1, it's assumed that data_ contains only one string. - SuffixTree(const Symb *data_, int dlen_ = -1) { + SuffixTree(const Symb *data, int dlen = -1) { Init(); if (dlen_ < 0) - dlen = (int)std::strlen((const char *)data_) + 1; + dlen_ = (int)std::strlen((const char *)data) + 1; else - dlen = dlen_; - dlen += 2; // add '\0' at the beginning and at the end - data.reset(new uchar[dlen]); - std::memcpy(data.get() + 1, data_, dlen - 2); - data[0] = data[dlen - 1] = '\0'; + dlen_ = dlen; + dlen_ += 2; // add '\0' at the beginning and at the end + data_.reset(new uchar[dlen_]); + std::memcpy(data_.get() + 1, data, dlen_ - 2); + data_[0] = data_[dlen_ - 1] = '\0'; Create(); } virtual ~SuffixTree() { Clear(); } //------------------------------------------------------------------------- // information and statistics - int GetNNodes() override { return (int)nodes.size(); } - int GetNLeaves() { return nleaves; } + int GetNNodes() override { return (int)nodes_.size(); } + int GetNLeaves() { return nleaves_; } int GetMemUsage() override; // real number of bytes used, without wasted space in 'vector' int GetMemAlloc() override; // total number of bytes used // branch[256] - array of counts of branching factors - // sumcnt[256] - array of summary subtree sizes (counts) for nodes of a given + // sumcnt[256] - array of summary subtree sizes (counts) for nodes_ of a given // branching factor sumvis[256] - array of summary number of visits (invoking - // GetChild) for a branching factor vld - number of valid internal nodes, i.e. + // GetChild) for a branching factor vld - number of valid internal nodes_, i.e. // with large count and small branching - good for prediction void // GetStat(int* branch, int* sumcnt, int* sumvis, int& vld, int* cnt = 0, // PNode n = 0, PNode prev = 0); @@ -291,7 +296,7 @@ class SuffixTree : public PPMModel { // flags (OR): // 1 - print label text // 2 - print lengths of labels - // 4 - print depths of nodes + // 4 - print depths of nodes_ // 8 - print counts void Print(std::ostream &str = std::cout, uint flags = 1 + 8, PNode n = 0, PNode prev = 0, int ind = 0); void PrintSub(std::ostream &str, int start, @@ -302,13 +307,13 @@ class SuffixTree : public PPMModel { // definitions for PPM void TransformForPPM(PPMParam param_ = PPMParam()) override; - void InitPPM() override { state = ROOT; } + void InitPPM() override { state_ = ROOT_; } // compression: [str,len_total] -> [len_of_edge,rng,total] void Move(Symb *str, int &len, Range &rng, Count &total) override; // decompression: [c,str,len_max] -> [str,len_of_edge,rng]+returned_error CprsErr Move(Count c, Symb *str, int &len, Range &rng) override; - Count GetTotal() override { return GetTotal(state); } + Count GetTotal() override { return GetTotal(state_); } #ifdef SUFTREE_STAT int GetDep(PNode s) { return GetNode(s).dep; } #endif diff --git a/storage/tianmu/compress/text_compressor.cpp b/storage/tianmu/compress/text_compressor.cpp index 3bcd24867..9f6843d40 100644 --- a/storage/tianmu/compress/text_compressor.cpp +++ b/storage/tianmu/compress/text_compressor.cpp @@ -45,44 +45,44 @@ namespace compress { // buffer // -TextCompressor::TextCompressor() : BLD_START(0), BLD_RATIO(0.0), graph() {} +TextCompressor::TextCompressor() : BLD_START_(0), BLD_RATIO_(0.0), graph_() {} void TextCompressor::SetSplit(int len) { DEBUG_ASSERT(len > 0); - DEBUG_ASSERT(BLD_RATIO > 1.01); - double ratio = len / (double)BLD_START; + DEBUG_ASSERT(BLD_RATIO_ > 1.01); + double ratio = len / (double)BLD_START_; if (ratio < 1.0) ratio = 1.0; - int n = (int)(log(ratio) / log(BLD_RATIO)) + 1; // no. of models (rounded downwards) + int n = (int)(log(ratio) / log(BLD_RATIO_)) + 1; // no. of models (rounded downwards) if (n < 1) n = 1; double bld_ratio = 0.0; if (n > 1) bld_ratio = pow(ratio, 1.0 / (n - 1)); // quotient of 2 consecutive splits - split.resize(n + 1); - split[0] = 0; - double next = BLD_START; + split_.resize(n + 1); + split_[0] = 0; + double next = BLD_START_; for (int i = 1; i < n; i++) { - split[i] = (int)next; - DEBUG_ASSERT(split[i] > split[i - 1]); + split_[i] = (int)next; + DEBUG_ASSERT(split_[i] > split_[i - 1]); next *= bld_ratio; } - split[n] = len; - DEBUG_ASSERT(split[n] > split[n - 1]); + split_[n] = len; + DEBUG_ASSERT(split_[n] > split_[n - 1]); } void TextCompressor::SetParams(PPMParam &p, int ver, [[maybe_unused]] int lev, int len) { p.SetDefault(); - BLD_START = 64; + BLD_START_ = 64; switch (ver) { case 1: p.esc_count = 25; - BLD_RATIO = 2.5; + BLD_RATIO_ = 2.5; break; case 2: p.esc_count = 70; - BLD_RATIO = 2.0; + BLD_RATIO_ = 2.0; break; default: TIANMU_ERROR("not implemented"); @@ -145,7 +145,7 @@ CprsErr TextCompressor::CompressPlain(char *dest, int &dlen, char *src, int slen SetParams(param, ver, lev, slen); // leave place in 'dest' for the array of 'dpos' of data parts - int n = (int)split.size() - 1; + int n = (int)split_.size() - 1; int *dpos_tab = (int *)(dest + dpos); dpos += n * sizeof(int); @@ -153,9 +153,9 @@ CprsErr TextCompressor::CompressPlain(char *dest, int &dlen, char *src, int slen // loop: build next PPM model, compress next part of the data for (int i = 0; i < n; i++) { - PPM ppm((uchar *)src, split[i], mt, param); + PPM ppm((uchar *)src, split_[i], mt, param); clen = dlen - dpos; - err = ppm.Compress(dest + dpos, clen, (uchar *)src + split[i], split[i + 1] - split[i]); + err = ppm.Compress(dest + dpos, clen, (uchar *)src + split_[i], split_[i + 1] - split_[i]); if (static_cast(err)) break; dpos_tab[i] = dpos; @@ -189,7 +189,7 @@ CprsErr TextCompressor::DecompressPlain(char *dest, int dlen, char *src, int sle PPMParam param; SetParams(param, ver, lev, dlen); - int n = (int)split.size() - 1; + int n = (int)split_.size() - 1; // read array of parts' positions in 'src' std::vector parts; @@ -203,8 +203,8 @@ CprsErr TextCompressor::DecompressPlain(char *dest, int dlen, char *src, int sle // loop: build next PPM model, decompress next part of the data for (int i = 0; i < n; i++) { - PPM ppm((uchar *)dest, split[i], mt, param, (uchar)src[parts[i]]); - err = ppm.Decompress((uchar *)dest + split[i], split[i + 1] - split[i], src + parts[i], parts[i + 1] - parts[i]); + PPM ppm((uchar *)dest, split_[i], mt, param, (uchar)src[parts[i]]); + err = ppm.Decompress((uchar *)dest + split_[i], split_[i + 1] - split_[i], src + parts[i], parts[i + 1] - parts[i]); if (static_cast(err)) return err; } @@ -456,7 +456,8 @@ CprsErr TextCompressor::DecompressZlib(char *dest, int dlen, char *src, int slen CprsErr TextCompressor::Compress(char *dest, int &dlen, char **index, const uint *lens, int nrec, uint &packlen, int ver, int lev) { if ((!dest) || (!index) || (!lens) || (dlen <= 0) || (nrec <= 0)) return CprsErr::CPRS_ERR_PAR; - if ((ver < 0) || (ver > MAXVER) || (lev < 1) || (lev > 9)) return CprsErr::CPRS_ERR_VER; + if ((ver < 0) || (ver > MAXVER_) || (lev < 1) || (lev > 9)) + return CprsErr::CPRS_ERR_VER; int slen = packlen; @@ -492,7 +493,7 @@ CprsErr TextCompressor::Compress(char *dest, int &dlen, char **index, const uint try { RangeCoder coder; coder.InitCompress(dest, dlen, dpos); - graph.Encode(&coder, index, lens, nrec, packlen); + graph_.Encode(&coder, index, lens, nrec, packlen); coder.EndCompress(); dpos = coder.GetPos(); } catch (CprsErr &e) { @@ -547,7 +548,7 @@ CprsErr TextCompressor::Decompress(char *dest, int dlen, char *src, int slen, ch try { RangeCoder coder; coder.InitDecompress(src, slen, spos); - graph.Decode(&coder, index, lens, nrec, dest, dlen); + graph_.Decode(&coder, index, lens, nrec, dest, dlen); } catch (CprsErr &e) { return e; } diff --git a/storage/tianmu/compress/text_compressor.h b/storage/tianmu/compress/text_compressor.h index 70c46cfb2..036909c68 100644 --- a/storage/tianmu/compress/text_compressor.h +++ b/storage/tianmu/compress/text_compressor.h @@ -36,35 +36,36 @@ namespace compress { // TODO: ratio - different algorithm for compression of short columns class TextCompressor { - // array of split positions, at which PPM models are built; + // array of split_ positions, at which PPM models are built; // its size is (no_of_models + 1) and is >= 2; - // split[0]=0 and split[size-1]=data_len - std::vector split; + // split_[0]=0 and split_[size-1]=data_len + std::vector split_; - // set splits so that split[i+1]/split[i] is approx. the same for all 'i' and - // is >= BLD_RATIO + // set splits so that split_[i+1]/split_[i] is approx. the same for all 'i' and + // is >= BLD_RATIO_ void SetSplit(int len); // simple permutation of strings to compress; PermNext will loop through // permuted elements 0..nrec-1, beginning from PermFirst() and finishing in // PermFirst() - static const int PERMSTEP = 2048; + static const int PERMSTEP_ = 2048; int PermFirst([[maybe_unused]] int nrec) { return 0; } void PermNext(int &i, int nrec) { - i += PERMSTEP; - if (i >= nrec) i = (i + 1) % PERMSTEP % nrec; + i += PERMSTEP_; + if (i >= nrec) + i = (i + 1) % PERMSTEP_ % nrec; } // parameters of the procedure of building a sequence of models - int BLD_START; // no. of bytes used to build the first model (they must be - // simply copied during compression) - double BLD_RATIO; // no. of bytes used to build the next model is min. - // BLD_RATIO * no_bytes_to_build_previous_model + int BLD_START_; // no. of bytes used to build the first model (they must be + // simply copied during compression) + double BLD_RATIO_; // no. of bytes used to build the next model is min. + // BLD_RATIO_ * no_bytes_to_build_previous_model void SetParams(PPMParam &p, int ver, int lev, int len); // sets 'p' and BLD_...; then invokes SetSplit() - IncWGraph graph; + IncWGraph graph_; CprsErr CompressCopy(char *dest, int &dlen, char *src, int slen); // stores ver=0 at the beginning @@ -78,7 +79,7 @@ class TextCompressor { // representative for the rest) // - and encodes them as null-separated concatenation (each string _begins_ // with '\0') Then it runs CompressSimple. - CprsErr CompressVer2(char *dest, int &dlen, char **index, const uint *lens, int nrec, int ver = VER, int lev = LEV); + CprsErr CompressVer2(char *dest, int &dlen, char **index, const uint *lens, int nrec, int ver = VER_, int lev = LEV_); CprsErr DecompressVer2(char *dest, int dlen, char *src, int slen, char **index, const uint * /*lens*/, int nrec); CprsErr CompressVer4(char *dest, int &dlen, char **index, const uint *lens, int nrec, int ver, int lev, uint packlen); @@ -90,9 +91,9 @@ class TextCompressor { TextCompressor(); ~TextCompressor() = default; - static const int MAXVER = static_cast(common::PackFmt::ZLIB); - static const int VER = 3; - static const int LEV = 7; + static const int MAXVER_ = static_cast(common::PackFmt::ZLIB); + static const int VER_ = 3; + static const int LEV_ = 7; // 'ver' - version of algorithm to use: // 0 - no compression, @@ -118,7 +119,7 @@ class TextCompressor { // (symbol '\0' has special meaning!) Note 2: strings used to build PPM model // are taken simply from the beginning of 'src', // so they can be non-representative for the next strings - CprsErr CompressPlain(char *dest, int &dlen, char *src, int slen, int ver = VER, int lev = LEV); + CprsErr CompressPlain(char *dest, int &dlen, char *src, int slen, int ver = VER_, int lev = LEV_); // 'dlen' - actual size of decompressed data ('slen' from CompressPlain()) // 'slen' - size of compressed data ('dlen' returned from CompressPlain()) @@ -136,8 +137,8 @@ class TextCompressor { // of compressed data. packlen - upon exit will hold minimum size of 'dest' // buffer which must be passed to Decompress. Size of compressed data is // ALWAYS <= total_length_of_records + 2. - CprsErr Compress(char *dest, int &dlen, char **index, const uint *lens, int nrec, uint &packlen, int ver = VER, - int lev = LEV); + CprsErr Compress(char *dest, int &dlen, char **index, const uint *lens, int nrec, uint &packlen, int ver = VER_, + int lev = LEV_); // dlen - length of buffer 'dest'; must be >= 'packlen' returned from Compress // (which is <= cumulative length of decoded strings) diff --git a/storage/tianmu/compress/top_bit_dict.cpp b/storage/tianmu/compress/top_bit_dict.cpp index f88c3c0fb..4a9b7a5b7 100644 --- a/storage/tianmu/compress/top_bit_dict.cpp +++ b/storage/tianmu/compress/top_bit_dict.cpp @@ -24,11 +24,12 @@ namespace Tianmu { namespace compress { template -const double TopBitDict::MINPREDICT = 0.97; +const double TopBitDict::MIN_PREDICT_ = 0.97; template uint TopBitDict::FindOptimum(DataSet *dataset, uint nbit, uint &opt_bit, Dictionary *&opt_dict) { - if (nbit <= 2) return INF; + if (nbit <= 2) + return INF_; T *data = dataset->data; uint nrec = dataset->nrec; @@ -36,37 +37,37 @@ uint TopBitDict::FindOptimum(DataSet *dataset, uint nbit, uint &opt_bit, D core::QuickMath math; opt_bit = 0; - opt_dict = &counters[0]; + opt_dict = &counters_[0]; double uni_pred = nrec * math.log2(maxval + 1.0); double opt_pred = uni_pred; - double min_pred = MINPREDICT * uni_pred; + double min_pred = MIN_PREDICT_ * uni_pred; double max_pred = 0.99 * uni_pred; - uint bitstart = (nbit / 2 < BITSTART) ? nbit / 2 : BITSTART; + uint bitstart = (nbit / 2 < BIT_START_) ? nbit / 2 : BIT_START_; uint bit = bitstart; - Dictionary *dict = &counters[1]; + Dictionary *dict = &counters_[1]; double pred; uint maxkey = 1u _SHL_ bitstart; // maximum no. of keys in the next loop uint skiprec, opt_skip = 0; - // uint skiprec = (nrec >> bitstart) / KEYOCCUR; // when skipping, each - // possible key occurs KEYOCCUR times on avg. + // uint skiprec = (nrec >> bitstart) / KEY_OCCUR_; // when skipping, each + // possible key occurs KEY_OCCUR_ times on avg. while (bit <= nbit) { - skiprec = nrec / (maxkey * KEYOCCUR); + skiprec = nrec / (maxkey * KEY_OCCUR_); if (!skiprec) skiprec = 1; // insert truncated bits to dictionary (counter) if (!Insert(dict, data, nbit, bit, nrec, skiprec)) break; // make prediction - DEBUG_ASSERT(nrec <= MAXTOTAL); + DEBUG_ASSERT(nrec <= MAX_TOTAL_); double uplen = 0.0; short nkey; auto keys = dict->GetKeys(nkey); for (short i = 0; i < nkey; i++) uplen -= math.nlog2n(keys[i].count); if (skiprec > 1) uplen = skiprec * uplen - nrec * math.log2(skiprec); uplen += math.nlog2n(nrec); - double cntlen = math.log2(MAXTOTAL) - math.log2((uint)nkey); + double cntlen = math.log2(MAX_TOTAL_) - math.log2((uint)nkey); pred = (nbit - bit) * nrec // bits encoded uniformly + uplen // bits encoded with dictionary + (bit + cntlen) * nkey; // dictionary @@ -86,18 +87,20 @@ uint TopBitDict::FindOptimum(DataSet *dataset, uint nbit, uint &opt_bit, D opt_dict = tmp; } } - // skiprec >>= BITSTEP; + // skiprec >>= BIT_STEP_; // if(!skiprec) skiprec = 1; // if((skiprec == 1) && (opt_pred > min_pred)) break; - bit += BITSTEP; - maxkey = (uint)nkey << BITSTEP; // upper bound for the no. of keys in the next loop + bit += BIT_STEP_; + maxkey = (uint)nkey << BIT_STEP_; // upper bound for the no. of keys in the next loop } - if (!opt_bit || (opt_pred >= min_pred)) return INF; + if (!opt_bit || (opt_pred >= min_pred)) + return INF_; if (opt_skip > 1) { bool ok = Insert(opt_dict, data, nbit, opt_bit, nrec, 1); // DEBUG_ASSERT(ok); - if (ok == false) return INF; + if (ok == false) + return INF_; // don't recalculate prediction } return (uint)opt_pred + 1; @@ -106,7 +109,7 @@ uint TopBitDict::FindOptimum(DataSet *dataset, uint nbit, uint &opt_bit, D template inline bool TopBitDict::Insert(Dictionary *dict, T *data, uint nbit, uint bit, uint nrec, uint skiprec) { dict->InitInsert(); - if (topbottom == TopBottom::tbTop) { // top bits + if (top_bottom_ == TopBottom::tbTop) { // top bits uchar bitlow = (uchar)(nbit - bit); DEBUG_ASSERT(bitlow < sizeof(T) * 8); for (uint i = 0; i < nrec; i += skiprec) @@ -138,26 +141,26 @@ bool TopBitDict::Encode(RangeCoder *coder, DataSet *dataset) { coder->EncodeUniform((uchar)0, (uchar)7); // save no. of lower bits - bitlow = (topbottom == TopBottom::tbTop) ? (T)(nbit - bitdict) : (T)bitdict; - coder->EncodeUniform(bitlow, (T)64); + bitlow_ = (top_bottom_ == TopBottom::tbTop) ? (T)(nbit - bitdict) : (T)bitdict; + coder->EncodeUniform(bitlow_, (T)64); // save dictionary - DEBUG_ASSERT(bitlow < sizeof(maxval) * 8); - T maxhigh = maxval >> bitlow, maxlow = ((T)1 _SHL_ bitlow) - (T)1; - T dictmax = (topbottom == TopBottom::tbTop) ? maxhigh : maxlow; + DEBUG_ASSERT(bitlow_ < sizeof(maxval) * 8); + T maxhigh = maxval >> bitlow_, maxlow = ((T)1 _SHL_ bitlow_) - (T)1; + T dictmax = (top_bottom_ == TopBottom::tbTop) ? maxhigh : maxlow; dict->Save(coder, dictmax); IFSTAT(uint pos1 = coder->GetPos()); - IFSTAT(codesize[0] = pos1 - pos0); + IFSTAT(codesize_[0] = pos1 - pos0); T *data = dataset->data; uint nrec = dataset->nrec; bool esc; // encode data - DEBUG_ASSERT(bitlow < sizeof(T) * 8); - if (topbottom == TopBottom::tbTop) + DEBUG_ASSERT(bitlow_ < sizeof(T) * 8); + if (top_bottom_ == TopBottom::tbTop) for (uint i = 0; i < nrec; i++) { - esc = dict->Encode(coder, data[i] >> bitlow); + esc = dict->Encode(coder, data[i] >> bitlow_); ASSERT(!esc, "TOP encode failed"); data[i] &= maxlow; } @@ -165,11 +168,11 @@ bool TopBitDict::Encode(RangeCoder *coder, DataSet *dataset) { for (uint i = 0; i < nrec; i++) { esc = dict->Encode(coder, data[i] & maxlow); ASSERT(!esc, "BOTTOM encode failed"); - data[i] >>= bitlow; + data[i] >>= bitlow_; } - IFSTAT(codesize[1] = coder->GetPos() - pos1); - dataset->maxval = (topbottom == TopBottom::tbTop) ? maxlow : maxhigh; + IFSTAT(codesize_[1] = coder->GetPos() - pos1); + dataset->maxval = (top_bottom_ == TopBottom::tbTop) ? maxlow : maxhigh; return true; } @@ -181,37 +184,37 @@ void TopBitDict::Decode(RangeCoder *coder, DataSet *dataset) { if (ver > 0) throw CprsErr::CPRS_ERR_COR; // read no. of lower bits - coder->DecodeUniform(bitlow, (T)64); + coder->DecodeUniform(bitlow_, (T)64); // load dictionary - Dictionary *dict = counters; - DEBUG_ASSERT(bitlow < sizeof(dataset->maxval) * 8); - T maxhigh = dataset->maxval >> bitlow, maxlow = ((T)1 _SHL_ bitlow) - (T)1; - T dictmax = (topbottom == TopBottom::tbTop) ? maxhigh : maxlow; + Dictionary *dict = counters_; + DEBUG_ASSERT(bitlow_ < sizeof(dataset->maxval) * 8); + T maxhigh = dataset->maxval >> bitlow_, maxlow = ((T)1 _SHL_ bitlow_) - (T)1; + T dictmax = (top_bottom_ == TopBottom::tbTop) ? maxhigh : maxlow; dict->Load(coder, dictmax); // decode data bool esc; uint nrec = dataset->nrec; for (uint i = 0; i < nrec; i++) { - esc = dict->Decode(coder, decoded[i]); + esc = dict->Decode(coder, decoded_[i]); ASSERT(!esc, "decode failed"); } - maxval_merge = dataset->maxval; - dataset->maxval = (topbottom == TopBottom::tbTop) ? maxlow : maxhigh; + maxval_merge_ = dataset->maxval; + dataset->maxval = (top_bottom_ == TopBottom::tbTop) ? maxlow : maxhigh; } template void TopBitDict::Merge(DataSet *dataset) { T *data = dataset->data; uint nrec = dataset->nrec; - DEBUG_ASSERT(bitlow < sizeof(T) * 8); - if (topbottom == TopBottom::tbTop) - for (uint i = 0; i < nrec; i++) data[i] |= decoded[i] << bitlow; + DEBUG_ASSERT(bitlow_ < sizeof(T) * 8); + if (top_bottom_ == TopBottom::tbTop) + for (uint i = 0; i < nrec; i++) data[i] |= decoded_[i] << bitlow_; else - for (uint i = 0; i < nrec; i++) (data[i] <<= bitlow) |= decoded[i]; - dataset->maxval = maxval_merge; // recover original maxval + for (uint i = 0; i < nrec; i++) (data[i] <<= bitlow_) |= decoded_[i]; + dataset->maxval = maxval_merge_; // recover original maxval } //------------------------------------------------------------------------------------- diff --git a/storage/tianmu/compress/top_bit_dict.h b/storage/tianmu/compress/top_bit_dict.h index 2ab3b4066..946a54927 100644 --- a/storage/tianmu/compress/top_bit_dict.h +++ b/storage/tianmu/compress/top_bit_dict.h @@ -29,16 +29,16 @@ namespace compress { template class TopBitDict : public DataFilt { public: - static const uint MAXTOTAL = RangeCoder::MAX_TOTAL; - static const uint MAXLEN = DICMAP_MAX; - static const uint BITSTART = 7; - static const uint BITSTEP = 2; - static const uint KEYOCCUR = 8; - static const double MINPREDICT; + static const uint MAX_TOTAL_ = RangeCoder::MAX_TOTAL_; + static const uint MAX_LEN_ = DICMAP_MAX; + static const uint BIT_START_ = 7; + static const uint BIT_STEP_ = 2; + static const uint KEY_OCCUR_ = 8; + static const double MIN_PREDICT_; enum class TopBottom { tbTop, tbBottom }; // which part of bits is compressed private: - const TopBottom topbottom; + const TopBottom top_bottom_; // uint* levels[2]; // for temporary use in FindOptimum() // T* datasort; @@ -51,25 +51,25 @@ class TopBitDict : public DataFilt { //}; // uint total; - Dictionary counters[2]; - T bitlow; // no. of bits in lower part + Dictionary counters_[2]; + T bitlow_; // no. of bits in lower part // for merging data during decompression - T decoded[MAXLEN]; // decoded 'highs' of values + T decoded_[MAX_LEN_]; // decoded_ 'highs' of values - T maxval_merge; + T maxval_merge_; // Finds optimum no. of bits to store in the dictionary. uint FindOptimum(DataSet *dataset, uint nbit, uint &opt_bit, Dictionary *&opt_dict); bool Insert(Dictionary *dict, T *data, uint nbit, uint bit, uint nrec, uint skiprec); - static const uint INF = UINT_MAX; + static const uint INF_ = UINT_MAX; - virtual void LogCompress(FILE *f) { std::fprintf(f, "%u %u", this->codesize[0], this->codesize[1]); } + virtual void LogCompress(FILE *f) { std::fprintf(f, "%u %u", this->codesize_[0], this->codesize_[1]); } public: - TopBitDict(bool top) : topbottom(top ? TopBottom::tbTop : TopBottom::tbBottom) {} + TopBitDict(bool top) : top_bottom_(top ? TopBottom::tbTop : TopBottom::tbBottom) {} virtual ~TopBitDict() = default; - char const *GetName() override { return topbottom == TopBottom::tbTop ? (char *)"top" : (char *)"low"; } + char const *GetName() override { return top_bottom_ == TopBottom::tbTop ? (char *)"top" : (char *)"low"; } bool Encode(RangeCoder *coder, DataSet *dataset) override; void Decode(RangeCoder *coder, DataSet *dataset) override; void Merge(DataSet *dataset) override; diff --git a/storage/tianmu/compress/word_graph.cpp b/storage/tianmu/compress/word_graph.cpp index b042882e7..a3f80776c 100644 --- a/storage/tianmu/compress/word_graph.cpp +++ b/storage/tianmu/compress/word_graph.cpp @@ -29,56 +29,56 @@ WordGraph::WordGraph(const Symb *data_, int dlen_, bool insatend_) { Init(); if (dlen_ < 0) - dlen = (int)std::strlen((const char *)data_) + 1; + dlen_ = (int)std::strlen((const char *)data_) + 1; else - dlen = dlen_; - DEBUG_ASSERT(dlen > 0); + dlen_ = dlen_; + DEBUG_ASSERT(dlen_ > 0); - data = data_; // the data are NOT copied! - insatend = insatend_; + data_ = data_; // the data_ are NOT copied! + insatend_ = insatend_; Create(); } void WordGraph::Init() { - data = NULL; - dlen = -1; - // NIL = 0; ROOT = 1; + data_ = NULL; + dlen_ = -1; + // NIL_ = 0; ROOT = 1; // nleaves = 0; - nodes.reserve(VECT_INIT); - nodes.resize(1); // nodes[0] - NIL + nodes_.reserve(VECT_INIT); + nodes_.resize(1); // nodes_[0] - NIL_ - edges.reserve(VECT_INIT); - edges.resize(1); // edges[0] - ENIL (it is not used, by the space is left for safety) + edges_.reserve(VECT_INIT); + edges_.resize(1); // edges_[0] - ENIL_ (it is not used, by the space is left for safety) - finals.reserve(VECT_INIT); + finals_.reserve(VECT_INIT); #ifdef SHADOW_EDGES - sedges.reserve(VECT_INIT); - sedges.resize(1); -// std::memset(shlen, 0, NSymb*sizeof(*shlen)); + sedges_.reserve(VECT_INIT); + sedges_.resize(1); +// std::memset(shlen_, 0, N_Symb_*sizeof(*shlen_)); #endif // # ifdef SUFTREE_STAT - // nodes[NIL].dep = -1; + // nodes_[NIL_].dep = -1; // # endif // iscreating = false; // for(int i = 0; i < 256; i++) - // nodes[NIL].child[i] = ROOT; + // nodes_[NIL_].child[i] = ROOT; // lens = NULL; } void WordGraph::Create() { // make root node - nodes.resize(2); - Node &root = GN(ROOT1); + nodes_.resize(2); + Node &root = GN(ROOT1_); root = Node(); - root.suf = NIL; - root.edge = ENIL; + root.suf = NIL_; + root.edge = ENIL_; - // create edges from NIL to ROOT, for every possible symbol as a label - for (int fsym = 0; fsym < NSymb; fsym++) AddEdge(NIL, (Symb)fsym, 1, true, ROOT1); + // create edges_ from NIL_ to ROOT, for every possible symbol as a label + for (int fsym = 0; fsym < N_Symb_; fsym++) AddEdge(NIL_, (Symb)fsym, 1, true, ROOT1_); - const uchar *s = data, *dstop = data + dlen, *sstop; + const uchar *s = data_, *dstop = data_ + dlen_, *sstop; while (s != dstop) { // set 'sstop' = beginning of the next string; note that the last string // doesn't need to be null-terminated! @@ -87,26 +87,26 @@ void WordGraph::Create() { if (sstop != dstop) sstop++; // strings inserted into the graph will include '\0' at the end // sstop = s + std::strlen((char*)s) + 1; - Point active = {ROOT1, 0, ENIL}; // active point - PNode last = NIL; // recently created node; stored for initialization of - // its 'suf' link - PNode final = NIL; // final node for the current string + Point active = {ROOT1_, 0, ENIL_}; // active point + PNode last = NIL_; // recently created node; stored for initialization of + // its 'suf' link + PNode final = NIL_; // final node for the current string while ((s != sstop) || (active.proj > 0)) { // only if active point is at node, it's worth invoking MoveDown() if (active.proj == 0) MoveDown(active, s, sstop); - Insert(active, (int)(s - data), (int)(sstop - data), last, final); + Insert(active, (int)(s - data_), (int)(sstop - data_), last, final); MoveSuffix(active, s, true); // after following a suffix link, we reached a node, so we already know // how to initialize the suf link of the recently created node - if ((last != NIL) && (active.proj == 0)) { + if ((last != NIL_) && (active.proj == 0)) { GN(last).suf = active.n; - last = NIL; + last = NIL_; } } - DEBUG_ASSERT(last == NIL); + DEBUG_ASSERT(last == NIL_); } } @@ -115,9 +115,9 @@ WordGraph::PNode WordGraph::NewFinal(int endpos) { Node &final = GN(pfinal); final.endpos = endpos; final.stop = 1; - final.suf = NIL; - final.edge = ENIL; - finals.push_back(pfinal); + final.suf = NIL_; + final.edge = ENIL_; + finals_.push_back(pfinal); return pfinal; } @@ -132,14 +132,15 @@ void WordGraph::MoveDown(Point &p, const uchar *&s, const uchar *sstop) { // read the first symbol of the next edge if (s == sstop) return; p.edge = FindEdge(p.n, *s); - if (p.edge == ENIL) return; + if (p.edge == ENIL_) + return; proj = 1; s++; // read all next symbols of the edge Edge &e = GE(p.edge); len = e.GetLen(); - lbl = data + GN(e.n).endpos - len; // text of the edge label + lbl = data_ + GN(e.n).endpos - len; // text of the edge label while ((proj < len) && (s != sstop) && (*s == lbl[proj])) { proj++; s++; @@ -147,7 +148,7 @@ void WordGraph::MoveDown(Point &p, const uchar *&s, const uchar *sstop) { if (proj < len) return; // check whether the passed edge was non-solid - in that case the target - // node must be duplicated, part of incoming edges redirected and active + // node must be duplicated, part of incoming edges_ redirected and active // point 'p' accordingly updated if (!e.IsSolid()) Duplicate(p); @@ -166,26 +167,26 @@ void WordGraph::Duplicate(Point &p) { // set fields n2.endpos = n1.endpos; // n2.stop = 0; - // n2.edge = ENIL; // should be 0 after construction + // n2.edge = ENIL_; // should be 0 after construction n2.suf = n1.suf; n1.suf = pn2; - // copy outgoing edges - PEdge pe2, prev = ENIL; - for (PEdge pe1 = n1.edge; pe1 != ENIL; pe1 = GE(pe1).next) { + // copy outgoing edges_ + PEdge pe2, prev = ENIL_; + for (PEdge pe1 = n1.edge; pe1 != ENIL_; pe1 = GE(pe1).next) { pe2 = NewEdge(); CopyEdge(pe1, pe2); GE(pe2).SetSolid(false); - if (prev == ENIL) + if (prev == ENIL_) n2.edge = pe2; else GE(prev).next = pe2; prev = pe2; } - // redirect incoming edges + // redirect incoming edges_ GE(p.edge).SetSolid(true); - const Symb *s = data + n1.endpos; + const Symb *s = data_ + n1.endpos; Point q = p; do { DEBUG_ASSERT(q.proj == GE(q.edge).GetLen()); @@ -203,7 +204,7 @@ void WordGraph::MoveSuffix(Point &p, const uchar *s, bool canonlast) { uint len; while (p.proj > 0) { e = FindEdge(p.n, *(s - p.proj)); - DEBUG_ASSERT(e != ENIL); + DEBUG_ASSERT(e != ENIL_); len = GE(e).GetLen(); if ((p.proj < len) || (!canonlast && (p.proj == len))) { @@ -216,40 +217,45 @@ void WordGraph::MoveSuffix(Point &p, const uchar *s, bool canonlast) { } void WordGraph::Insert(Point p, int pos, int endpos, PNode &last, PNode &final) { - uchar s = data[pos]; - bool isfirst = (final == NIL); + uchar s = data_[pos]; + bool isfirst = (final == NIL_); // 'p' is at a node, fully canonized -> add leaf below the node if (p.proj == 0) { - if (last != NIL) GN(last).suf = p.n; - last = NIL; + if (last != NIL_) + GN(last).suf = p.n; + last = NIL_; if (pos < endpos) { - if (final == NIL) final = NewFinal(endpos); + if (final == NIL_) + final = NewFinal(endpos); AddEdge(p.n, s, endpos - pos, isfirst, final); } else { // GN(p.n).stop ++; - if (final == NIL) { + if (final == NIL_) { final = p.n; - if (GN(final).stop++ == 0) finals.push_back(final); - } else if (GN(final).suf == NIL) + if (GN(final).stop++ == 0) + finals_.push_back(final); + } else if (GN(final).suf == NIL_) GN(final).suf = p.n; } } // 'p' is on a solid edge or there was no node recently created -> split the // edge, add new node - else if (GE(p.edge).IsSolid() || (last == NIL)) { + else if (GE(p.edge).IsSolid() || (last == NIL_)) { PNode pn = NewNode(); // edge from 'n' to 'final' if (pos < endpos) { - if (final == NIL) final = NewFinal(endpos); // caution: new node is added + if (final == NIL_) + final = NewFinal(endpos); // caution: new node is added AddEdge(pn, s, endpos - pos, isfirst, final); - } else if (final == NIL) { + } else if (final == NIL_) { final = pn; - if (GN(final).stop++ == 0) finals.push_back(final); - } else if (GN(final).suf == NIL) + if (GN(final).stop++ == 0) + finals_.push_back(final); + } else if (GN(final).suf == NIL_) GN(final).suf = pn; // edge from 'n' to 'next' @@ -258,7 +264,7 @@ void WordGraph::Insert(Point p, int pos, int endpos, PNode &last, PNode &final) // added! (array can be reallocated) Node &next = GN(GE(pe).n); n.endpos = next.endpos - GE(pe).GetLen() + p.proj; - AddEdge(pn, data[n.endpos], next.endpos - n.endpos, GE(pe).IsSolid(), GE(pe).n); + AddEdge(pn, data_[n.endpos], next.endpos - n.endpos, GE(pe).IsSolid(), GE(pe).n); // edge from 'prev' to 'n' Edge &e = GE(pe); @@ -267,7 +273,8 @@ void WordGraph::Insert(Point p, int pos, int endpos, PNode &last, PNode &final) e.SetSolid(true); // now we can initialize the 'suf' link of the previously created node - if (last != NIL) GN(last).suf = pn; + if (last != NIL_) + GN(last).suf = pn; last = pn; } @@ -281,29 +288,29 @@ void WordGraph::Insert(Point p, int pos, int endpos, PNode &last, PNode &final) } void WordGraph::Clear() { - finals.clear(); - edges.clear(); - nodes.clear(); - data = NULL; + finals_.clear(); + edges_.clear(); + nodes_.clear(); + data_ = NULL; } //------------------------------------------------------------------------ int WordGraph::GetMemUsage() { - size_t x = nodes.size() * sizeof(Node); - x += edges.size() * sizeof(Edge); - x += finals.size() * sizeof(PNode); - x += sedges.size() * sizeof(SEdge); - // if(data) x += dlen * sizeof(*data); + size_t x = nodes_.size() * sizeof(Node); + x += edges_.size() * sizeof(Edge); + x += finals_.size() * sizeof(PNode); + x += sedges_.size() * sizeof(SEdge); + // if(data_) x += dlen_ * sizeof(*data_); return (int)x; } int WordGraph::GetMemAlloc() { - size_t x = nodes.capacity() * sizeof(Node); - x += edges.capacity() * sizeof(Edge); - x += finals.capacity() * sizeof(PNode); - x += sedges.capacity() * sizeof(SEdge); - // if(data) x += dlen * sizeof(*data); + size_t x = nodes_.capacity() * sizeof(Node); + x += edges_.capacity() * sizeof(Edge); + x += finals_.capacity() * sizeof(PNode); + x += sedges_.capacity() * sizeof(SEdge); + // if(data_) x += dlen_ * sizeof(*data_); return (int)x; } @@ -313,7 +320,7 @@ void WordGraph::PrintStat([[maybe_unused]] FILE *f) { std::vector stack; stack.reserve(STACK_INIT); - stack.push_back(ROOT1); + stack.push_back(ROOT1_); PNode pn; PEdge pe; while(!stack.empty()) { @@ -322,7 +329,7 @@ void WordGraph::PrintStat([[maybe_unused]] FILE *f) { Node& n = GN(pn); int nchild = 0; pe = n.edge; - while(pe != ENIL) { + while(pe != ENIL_) { nchild++; stack.push_back(GE(pe).n); pe = GE(pe).next; @@ -335,11 +342,11 @@ void WordGraph::PrintStat([[maybe_unused]] FILE *f) { void WordGraph::PrintLbl(std::ostream &str, Edge &e) { int stop = GN(e.n).endpos; char c; - for (int i = stop - e.GetLen(); i < stop; i++) str << (((c = (char)data[i]) > 13) ? c : '#'); + for (int i = stop - e.GetLen(); i < stop; i++) str << (((c = (char)data_[i]) > 13) ? c : '#'); } void WordGraph::Print(std::ostream &str, [[maybe_unused]] uint flags) { - for (int n = ROOT1; n < (int)nodes.size(); n++) { + for (int n = ROOT1_; n < (int)nodes_.size(); n++) { Node &nd = GN(n); str << n << ": "; str << " endpos = " << nd.endpos; @@ -348,7 +355,7 @@ void WordGraph::Print(std::ostream &str, [[maybe_unused]] uint flags) { str << " count = " << nd.count; str << " total = " << nd.total; - for (PEdge e = nd.edge; e != NIL; e = GE(e).next) { + for (PEdge e = nd.edge; e != NIL_; e = GE(e).next) { Edge &ed = GE(e); str << "\n " << n << " -> " << ed.n << ": "; str << "\""; @@ -366,17 +373,17 @@ void WordGraph::Print(std::ostream &str, [[maybe_unused]] uint flags) { //------------------------------------------------------------------------ void WordGraph::TransformForPPM(PPMParam param_) { - param = param_; + param_ = param_; - // propagate 'stop' numbers from final nodes to all border-path nodes + // propagate 'stop' numbers from final nodes_ to all border-path nodes_ PropagateStop(); - // compute counts of all nodes (not only on the border path) + // compute counts of all nodes_ (not only on the border path) SetCounts(); // SortEdges(GN(ROOT)); - // set cumulative counts of nodes - field 'sum'; - // create shadow edges, which are used after "escape" transition during + // set cumulative counts of nodes_ - field 'sum'; + // create shadow edges_, which are used after "escape" transition during // compress SetSums(); } @@ -385,20 +392,20 @@ void WordGraph::PropagateStop() { // propagate 'stop' through the suffix path, temporarily accumulating 'stop' // values in 'count' fields (this is necessary because some final node may be // a suffix of another final node) - for (uint i = 0; i < finals.size(); i++) { - int s = GN(finals[i]).stop; - PNode n = finals[i]; + for (uint i = 0; i < finals_.size(); i++) { + int s = GN(finals_[i]).stop; + PNode n = finals_[i]; - while (n != ROOT1) { + while (n != ROOT1_) { GN(n).count += s; n = GN(n).suf; } } // move values from 'count' to 'stop' - for (uint i = 0; i < finals.size(); i++) { - PNode n = finals[i]; - while (n != ROOT1) { + for (uint i = 0; i < finals_.size(); i++) { + PNode n = finals_[i]; + while (n != ROOT1_) { Node &nd = GN(n); if (nd.count == 0) break; nd.stop = nd.count; @@ -428,7 +435,7 @@ void WordGraph::SetCounts() { if (proc) { // children are already processed (their counts are correct) count = node.stop; - while (e != NIL) { + while (e != NIL_) { count += GN(GE(e).n).count; e = NxtEdge(e); } @@ -436,7 +443,7 @@ void WordGraph::SetCounts() { } else if (count == 0) { // insert children onto the stack stack.push_back(StackElem(n, true)); int nedges = 0; - while (e != NIL) { + while (e != NIL_) { stack.push_back(StackElem(GE(e).n, false)); e = NxtEdge(e); nedges++; @@ -466,18 +473,18 @@ void WordGraph::SetCounts() { //} void WordGraph::SetSums() { - // set sums of edges from NIL to ROOT + // set sums of edges_ from NIL_ to ROOT PNode pn; PEdge pe; int s = 0; - pe = GN(NIL).edge; - while (pe != ENIL) { + pe = GN(NIL_).edge; + while (pe != ENIL_) { GE(pe).sum = s++; pe = GE(pe).next; } - GN(NIL).total = s; + GN(NIL_).total = s; - std::vector stack; // holds nodes which outgoing edges' sums must be set + std::vector stack; // holds nodes_ which outgoing edges_' sums must be set stack.reserve(STACK_INIT); // TODO:uncomment ROOT // stack.push_back(ROOT); @@ -502,7 +509,7 @@ void WordGraph::SetSums() { #endif pe = node.edge; - while (pe != ENIL) { + while (pe != ENIL_) { Edge &e = GE(pe); e.sum = sum; // 'e.sum' is the lower bound @@ -530,46 +537,48 @@ void WordGraph::SetShadow([[maybe_unused]] PNode pn) { #ifdef SHADOW_EDGES Node &n1 = GN(pn); PNode pe = n1.edge; - if (pe == ENIL) return; // don't make shadow edges for leaves + if (pe == ENIL_) + return; // don't make shadow edges_ for leaves - // fill in the 'shlen' array of edge lengths - DEBUG_ASSERT(sizeof(shlen) == NSymb * sizeof(*shlen)); - std::memset(shlen, 0, sizeof(shlen)); // isn't it faster to clear only the - // used cells of 'shlen' at the end? + // fill in the 'shlen_' array of edge lengths + DEBUG_ASSERT(sizeof(shlen_) == N_Symb_ * sizeof(*shlen_)); + std::memset(shlen_, 0, sizeof(shlen_)); // isn't it faster to clear only the + // used cells of 'shlen_' at the end? do { Edge &e = GE(pe); - shlen[e.fsym] = e.GetLen(); + shlen_[e.fsym] = e.GetLen(); pe = e.next; - } while (pe != ENIL); + } while (pe != ENIL_); - // compute total count of shadow edges (not including ESC) + // compute total count of shadow edges_ (not including ESC) // TODO: try to do it approximately but faster Node &n2 = GN(n1.suf); pe = n2.edge; - DEBUG_ASSERT(pe != ENIL); + DEBUG_ASSERT(pe != ENIL_); int totcnt = 0; do { Edge &e = GE(pe); - if (shlen[e.fsym] != e.GetLen()) totcnt += GN(e.n).count; + if (shlen_[e.fsym] != e.GetLen()) + totcnt += GN(e.n).count; pe = e.next; - } while (pe != ENIL); + } while (pe != ENIL_); - n1.sedge = sedges.size(); + n1.sedge = sedges_.size(); // int shift = Shift(n2.count - n1.count); // this is an upper // approximation of the shift; it could be lower int shift = Shift(totcnt); int cnt, scnt; Count &stotal = n1.stotal = GetShEscCount(pn); - // loop through edges of suffix-link node and create shadow edges when + // loop through edges_ of suffix-link node and create shadow edges_ when // necessary; set shadow sums and total count pe = n2.edge; do { Edge &e = GE(pe); - if (shlen[e.fsym] != e.GetLen()) { - DEBUG_ASSERT((shlen[e.fsym] == 0) || (shlen[e.fsym] > e.GetLen())); - sedges.resize(n1.sedge + ++n1.nshadow); - SEdge &se = sedges.back(); + if (shlen_[e.fsym] != e.GetLen()) { + DEBUG_ASSERT((shlen_[e.fsym] == 0) || (shlen_[e.fsym] > e.GetLen())); + sedges_.resize(n1.sedge + ++n1.nshadow); + SEdge &se = sedges_.back(); se.fsym = e.fsym; se.sum = stotal; @@ -583,7 +592,7 @@ void WordGraph::SetShadow([[maybe_unused]] PNode pn) { stotal += (Count)scnt; } pe = e.next; - } while (pe != ENIL); + } while (pe != ENIL_); #endif } @@ -591,9 +600,9 @@ void WordGraph::SetShadow([[maybe_unused]] PNode pn) { int WordGraph::Shift(int c) { int s = 0; #ifdef SHIFT_ESC - int limit = COUNT_MAX / 4 - NSymb - 1; + int limit = COUNT_MAX / 4 - N_Symb_ - 1; #else - int limit = COUNT_MAX / 4 - NSymb - MAX_ESC_COUNT; + int limit = COUNT_MAX / 4 - N_Symb_ - MAX_ESC_COUNT; #endif while ((c _SHR_ s) > limit) s++; return s; @@ -606,36 +615,37 @@ int WordGraph::Shift(int c) { void WordGraph::FindEdge(PEdge &e, PSEdge &se, Symb *str, int len) { // find the edge beginning with symbol str[0] DEBUG_ASSERT(len > 0); - if (state.prev == NIL) { // the last transition was forward - se = SENIL; - e = FindEdge(state.n, str[0]); - if (e == ENIL) return; + if (state_.prev == NIL_) { // the last transition was forward + se = SENIL_; + e = FindEdge(state_.n, str[0]); + if (e == ENIL_) + return; } else { // the last transition was ESC - se = FindSEdge(state.prev, str[0]); - if (se == SENIL) { - e = ENIL; + se = FindSEdge(state_.prev, str[0]); + if (se == SENIL_) { + e = ENIL_; return; } - e = FindEdge(state.n, str[0]); + e = FindEdge(state_.n, str[0]); } - DEBUG_ASSERT(e != ENIL); + DEBUG_ASSERT(e != ENIL_); // check if the rest of the edge label is the same as 'str' PNode n = GE(e).n; int elen = GE(e).GetLen(); - if ((elen > len) || (std::memcmp(str + 1, data + GN(n).endpos - elen + 1, elen - 1) != 0)) { - e = ENIL; - se = SENIL; + if ((elen > len) || (std::memcmp(str + 1, data_ + GN(n).endpos - elen + 1, elen - 1) != 0)) { + e = ENIL_; + se = SENIL_; } } void WordGraph::FindEdge(PEdge &e, PSEdge &se, Count c) { // find the edge with count 'c'; linear search - if (state.prev == NIL) { // the last transition was forward - e = ENIL; - se = SENIL; - PEdge next = GN(state.n).edge; - while ((next != ENIL) && (c >= GE(next).sum)) { + if (state_.prev == NIL_) { // the last transition was forward + e = ENIL_; + se = SENIL_; + PEdge next = GN(state_.n).edge; + while ((next != ENIL_) && (c >= GE(next).sum)) { e = next; next = GE(e).next; } @@ -643,16 +653,16 @@ void WordGraph::FindEdge(PEdge &e, PSEdge &se, Count c) { #ifdef SHADOW_EDGES // find a shadow edge 'se' - Node &n = GN(state.prev); + Node &n = GN(state_.prev); se = n.sedge; PSEdge stop = se + n.nshadow; while ((se != stop) && (c >= GSE(se).sum)) se++; if (--se < n.sedge) { - se = SENIL; - e = ENIL; + se = SENIL_; + e = ENIL_; } else { - e = FindEdge(state.n, GSE(se).fsym); - DEBUG_ASSERT(e != ENIL); + e = FindEdge(state_.n, GSE(se).fsym); + DEBUG_ASSERT(e != ENIL_); } #else @@ -662,7 +672,7 @@ void WordGraph::FindEdge(PEdge &e, PSEdge &se, Count c) { } CprsErr WordGraph::GetLabel(PEdge e, [[maybe_unused]] Symb *lbl, int &len) { - if (e == ENIL) { + if (e == ENIL_) { len = 0; return CprsErr::CPRS_SUCCESS; } // "escape" @@ -672,30 +682,30 @@ CprsErr WordGraph::GetLabel(PEdge e, [[maybe_unused]] Symb *lbl, int &len) { len = (int)edge.GetLen(); // TODO:uncomment ROOT - // if(edge.n == ROOT) lbl[0] = edge.fsym; // edges from - // NIL consider separately - // else std::memcpy(lbl, data + GN(edge.n).endpos - len, len); + // if(edge.n == ROOT) lbl[0] = edge.fsym; // edges_ from + // NIL_ consider separately + // else std::memcpy(lbl, data_ + GN(edge.n).endpos - len, len); return CprsErr::CPRS_SUCCESS; } void WordGraph::GetRange(PEdge e, [[maybe_unused]] PSEdge se, Range &r) { r.low = 0; - if (state.prev == NIL) { // the last transition was forward + if (state_.prev == NIL_) { // the last transition was forward PEdge next; - Node &n = GN(state.n); - if (e == ENIL) + Node &n = GN(state_.n); + if (e == ENIL_) next = n.edge; else { r.low = GE(e).sum; next = GE(e).next; } - r.high = (next == ENIL ? n.total : GE(next).sum); + r.high = (next == ENIL_ ? n.total : GE(next).sum); } else { // the last transition was ESC #ifdef SHADOW_EDGES PSEdge next; - Node &n = GN(state.prev); - if (se == SENIL) + Node &n = GN(state_.prev); + if (se == SENIL_) next = n.sedge; else { r.low = GSE(se).sum; @@ -711,58 +721,59 @@ void WordGraph::GetRange(PEdge e, [[maybe_unused]] PSEdge se, Range &r) { } void WordGraph::Move(PEdge e) { - if (e == ENIL) { - state.lastesc = true; - if (NoShadow(state.n)) - state.prev = NIL; + if (e == ENIL_) { + state_.lastesc = true; + if (NoShadow(state_.n)) + state_.prev = NIL_; else - state.prev = state.n; - state.n = GN(state.n).suf; + state_.prev = state_.n; + state_.n = GN(state_.n).suf; } else { - state.lastesc = false; - state.prev = NIL; - state.n = GE(e).n; + state_.lastesc = false; + state_.prev = NIL_; + state_.n = GE(e).n; } // # ifndef SHADOW_EDGES - // state.prev = NIL; + // state_.prev = NIL_; // # endif } void WordGraph::MakeLog(PNode stt, PEdge e) { - if (logfile == NULL) return; - // if(e != ENIL) { - // std::fprintf(logfile, "F\n"); // forward transition + if (log_file_ == NULL) + return; + // if(e != ENIL_) { + // std::fprintf(log_file_, "F\n"); // forward transition // return; //} // compute no. of transitions from n1 and n2 Node &n1 = GN(stt); int t1 = 0; - for (PEdge e1 = n1.edge; e1 != ENIL; e1 = GE(e1).next) t1++; - // for(e = n2.edge; e != ENIL; e = GE(e).next) t2++; + for (PEdge e1 = n1.edge; e1 != ENIL_; e1 = GE(e1).next) t1++; + // for(e = n2.edge; e != ENIL_; e = GE(e).next) t2++; - std::fprintf(logfile, "%d %d %d %d\n", (int)state.lastesc, (int)(e == ENIL), t1, n1.count - n1.stop); - // std::fprintf(logfile, "E %d:%d %d:%d\n", t1, t2, n1.count, n2.count); + std::fprintf(log_file_, "%d %d %d %d\n", (int)state_.lastesc, (int)(e == ENIL_), t1, n1.count - n1.stop); + // std::fprintf(log_file_, "E %d:%d %d:%d\n", t1, t2, n1.count, n2.count); } void WordGraph::InitPPM() { - state.lastesc = false; - state.prev = NIL; + state_.lastesc = false; + state_.prev = NIL_; // TODO:uncomment ROOT - // state.n = ROOT; + // state_.n = ROOT; } void WordGraph::Move(Symb *str, int &len, Range &rng, Count &total) { PEdge e; PSEdge se; FindEdge(e, se, str, len); - len = (e == ENIL ? 0 : GE(e).GetLen()); + len = (e == ENIL_ ? 0 : GE(e).GetLen()); GetRange(e, se, rng); total = GetTotal_(); DEBUG_ASSERT(rng.high <= total); #ifdef MAKELOG - MakeLog(state.n, e); + MakeLog(state_.n, e); #endif Move(e); @@ -798,12 +809,12 @@ CprsErr WordGraph::Move(Count c, Symb *str, int &len, Range &rng) { WordGraph::PEdge WordGraph::FindEdge(PNode n, Symb s) { PEdge e = GN(n).edge; - while (e != ENIL) + while (e != ENIL_) if (GE(e).fsym == s) return e; else e = GE(e).next; - return ENIL; + return ENIL_; } WordGraph::PSEdge WordGraph::FindSEdge([[maybe_unused]] PNode n, [[maybe_unused]] Symb s) { @@ -816,7 +827,7 @@ WordGraph::PSEdge WordGraph::FindSEdge([[maybe_unused]] PNode n, [[maybe_unused] else e1++; #endif - return SENIL; + return SENIL_; } WordGraph::PEdge WordGraph::AddEdge(PNode n, Symb s, int len, bool solid, PNode m) { @@ -827,16 +838,17 @@ WordGraph::PEdge WordGraph::AddEdge(PNode n, Symb s, int len, bool solid, PNode e.SetLen(len); e.SetSolid(solid); - if (!insatend) { + if (!insatend_) { // add the edge at the beginning of the list e.next = GN(n).edge; return (GN(n).edge = pe); } else { // add the edge at the end - e.next = ENIL; + e.next = ENIL_; PEdge last = GN(n).edge; - if (last == ENIL) return (GN(n).edge = pe); - while (GE(last).next != ENIL) last = GE(last).next; + if (last == ENIL_) + return (GN(n).edge = pe); + while (GE(last).next != ENIL_) last = GE(last).next; return (GE(last).next = pe); } } @@ -864,18 +876,18 @@ WordGraph::PEdge WordGraph::AddEdge(PNode n, Symb s, int len, bool solid, PNode Count WordGraph::GetEscCount([[maybe_unused]] PNode n, [[maybe_unused]] int c) { DEBUG_ASSERT(c >= 0); #ifdef EXP_ESC_COUNT - return (int)(param.esc_coef * pow(c, param.esc_exp)) + param.esc_count; + return (int)(param_.esc_coef * pow(c, param_.esc_exp)) + param_.esc_count; #else - return param.esc_count; + return param_.esc_count; #endif } Count WordGraph::GetShEscCount([[maybe_unused]] PNode n, [[maybe_unused]] int c) { DEBUG_ASSERT(c >= 0); #ifdef EXP_ESC_COUNT - return (int)(param.esc_coef * pow(c, param.esc_exp)) + param.esc_count; + return (int)(param_.esc_coef * pow(c, param_.esc_exp)) + param_.esc_count; #else - return param.esc_count; + return param_.esc_count; #endif } diff --git a/storage/tianmu/compress/word_graph.h b/storage/tianmu/compress/word_graph.h index 2b53598ae..ca4b24a0e 100644 --- a/storage/tianmu/compress/word_graph.h +++ b/storage/tianmu/compress/word_graph.h @@ -39,18 +39,18 @@ class WordGraph : public PPMModel { int stop; // no. of occurences of representative of this node as a suffix // (so terminating in this node) PNode suf; - PEdge edge; // pointer to the first edge leaving this node (or ENIL if no + PEdge edge; // pointer to the first edge leaving this node (or ENIL_ if no // edge leaves the node) int count; // 'stop' is included in 'count' - Count total; // total count for distribution of outgoing edges + ESC + Count total; // total count for distribution of outgoing edges_ + ESC - // bool istab; // "edge" points to an array of 256 edges + // bool istab; // "edge" points to an array of 256 edges_ #ifdef SHADOW_EDGES - Symb nshadow; // no. of shadow edges + Symb nshadow; // no. of shadow edges_ PSEdge sedge; // first shadow edge - Count stotal; // total count for distribution of shadow edges + ESC + Count stotal; // total count for distribution of shadow edges_ + ESC #endif Node() { std::memset(this, 0, sizeof *this); } @@ -88,29 +88,29 @@ class WordGraph : public PPMModel { }; #pragma pack(pop) - static const PNode ROOT1 = 1; - static const PNode NIL = 0; - static const PEdge ENIL = 0; - static const PSEdge SENIL = 0; + static const PNode ROOT1_ = 1; + static const PNode NIL_ = 0; + static const PEdge ENIL_ = 0; + static const PSEdge SENIL_ = 0; - std::vector nodes; - std::vector edges; - std::vector finals; // list of final nodes - std::vector sedges; + std::vector nodes_; + std::vector edges_; + std::vector finals_; // list of final nodes_ + std::vector sedges_; - // original string - for reference; this is a pointer to _original_ data, not + // original string - for reference; this is a pointer to _original_ data_, not // a copy!!! - const Symb *data; - int dlen; // length of 'data', INcluding the possible terminating symbol - // ('\0') + const Symb *data_; + int dlen_; // length of 'data_', INcluding the possible terminating symbol + // ('\0') - PPMParam param; + PPMParam param_; //------------------------------------------------------------------------- - Node &GN(PNode n) { return nodes[n]; } - Edge &GE(PEdge e) { return edges[e]; } - SEdge &GSE(PSEdge e) { return sedges[e]; } + Node &GN(PNode n) { return nodes_[n]; } + Edge &GE(PEdge e) { return edges_[e]; } + SEdge &GSE(PSEdge e) { return sedges_[e]; } PEdge NxtEdge(PEdge e) { return GE(e).next; } PEdge FindEdge(PNode n, Symb s); PEdge AddEdge(PNode n, Symb s, int len, bool solid, PNode m); @@ -118,17 +118,17 @@ class WordGraph : public PPMModel { PSEdge FindSEdge(PNode n, Symb s); PNode NewNode() { - int s = (int)nodes.size(); - nodes.resize(s + 1); + int s = (int)nodes_.size(); + nodes_.resize(s + 1); return s; } PEdge NewEdge() { - int s = (int)edges.size(); - edges.resize(s + 1); + int s = (int)edges_.size(); + edges_.resize(s + 1); return s; } - // PSEdge NewSEdge() { int s = (int)sedges.size(); - // sedges.resize(s+1); return s; + // PSEdge NewSEdge() { int s = (int)sedges_.size(); + // sedges_.resize(s+1); return s; // } PNode NewFinal(int endpos); @@ -144,7 +144,7 @@ class WordGraph : public PPMModel { // (1) { n = x, proj = 0, edge = undefined } // (2) { n = parent_of_x, proj = len_of_edge, edge = edge_to_x } // The latter (not fully canonized) is required during duplication, to find - // non-solid edges for redirection. + // non-solid edges_ for redirection. struct Point { PNode n; uint proj; // number of symbols passed from 'n' along 'edge' @@ -173,8 +173,8 @@ class WordGraph : public PPMModel { }; #ifdef SHADOW_EDGES - int shlen[NSymb]; // used in SetShadow() to store lenghts of edges of the - // longer-suffix node + int shlen_[N_Symb_]; // used in SetShadow() to store lenghts of edges_ of the + // longer-suffix node #endif void PropagateStop(); @@ -182,15 +182,15 @@ class WordGraph : public PPMModel { // void SortEdges(); void SetSums(); void SetShadow(PNode pn); - int Shift(int c); // c - cumulative count of outgoing edges (without ESC and + int Shift(int c); // c - cumulative count of outgoing edges_ (without ESC and // 'stop' of the node) Count GetEscCount(PNode n, int c = 0); Count GetShEscCount(PNode n, int c = 0); - // Count GetEscCount(PNode n) { DEBUG_ASSERT(n != NIL); return - // param.esc_count; } - // Count GetShEscCount(PNode n) { DEBUG_ASSERT(n != NIL); return - // param.esc_count; } + // Count GetEscCount(PNode n) { DEBUG_ASSERT(n != NIL_); return + // param_.esc_count; } + // Count GetShEscCount(PNode n) { DEBUG_ASSERT(n != NIL_); return + // param_.esc_count; } //------------------------------------------------------------------------- // PPM compression/decompression @@ -202,47 +202,47 @@ class WordGraph : public PPMModel { #endif // if the last transition was forward or the last node doesn't have shadow - // edges, prev == NIL; otherwise it's the previous node + // edges_, prev == NIL_; otherwise it's the previous node struct State { bool lastesc; // the last transition was ESC PNode prev, n; }; - State state; - // PNode state; + State state_; + // PNode state_; - // when "escape", e := ENIL + // when "escape", e := ENIL_ void FindEdge(PEdge &e, PSEdge &se, Symb *str, int len); // compression void FindEdge(PEdge &e, PSEdge &se, Count c); // decompression CprsErr GetLabel(PEdge e, Symb *lbl, int &len); // 'len' - max size of lbl; upon exit: length of lbl void GetRange(PEdge e, PSEdge se, - Range &r); // 'e' must be an edge of the 'state'; 'se' is - // shadow of 'e' or SENIL + Range &r); // 'e' must be an edge of the 'state_'; 'se' is + // shadow of 'e' or SENIL_ #ifdef SHADOW_EDGES - Count GetTotal_() { return state.prev == NIL ? GN(state.n).total : GN(state.prev).stotal; } + Count GetTotal_() { return state_.prev == NIL_ ? GN(state_.n).total : GN(state_.prev).stotal; } #else - Count GetTotal_() { return GN(state.n).total; } + Count GetTotal_() { return GN(state_.n).total; } #endif void Move(PEdge e); - void MakeLog(PNode stt, PEdge e); // print into 'logfile' information about - // current state and transition + void MakeLog(PNode stt, PEdge e); // print into 'log_file_' information about + // current state_ and transition //----------------------------------------------------------------------------- public: // CAUTION: the 'data_' array is NOT physically copied, only its pointer. - // So the data must not change outside this class during lifetime of this + // So the data_ must not change outside this class during lifetime of this // object. WordGraph(const Symb *data_, int dlen_ = -1, bool insatend_ = true); virtual ~WordGraph() { Clear(); } - bool insatend; // insert new child at the end of the children list? + bool insatend_; // insert new child at the end of the children list? //------------------------------------------------------------------------- // information and statistics - int GetNNodes() override { return (int)nodes.size(); } + int GetNNodes() override { return (int)nodes_.size(); } void PrintStat(FILE *f) override; int GetMemUsage() override; // real number of bytes used, without wasted space in 'vector' int GetMemAlloc() override; // total number of bytes used diff --git a/storage/tianmu/core/aggregator_advanced.cpp b/storage/tianmu/core/aggregator_advanced.cpp index 61e653e12..69cba9def 100644 --- a/storage/tianmu/core/aggregator_advanced.cpp +++ b/storage/tianmu/core/aggregator_advanced.cpp @@ -178,8 +178,8 @@ void AggregatorGroupConcat::PutAggregatedValue(unsigned char *buf, const types:: auto it = lenmap.find(buf); if (it == lenmap.end()) { - auto copylen = (v.len > gconcat_maxlen) ? gconcat_maxlen : v.len; - std::memcpy(buf, v.val, copylen); + auto copylen = (v.len_ > gconcat_maxlen) ? gconcat_maxlen : v.len_; + std::memcpy(buf, v.val_, copylen); lenmap.emplace(buf, copylen); } else { auto pos = it->second; @@ -188,8 +188,8 @@ void AggregatorGroupConcat::PutAggregatedValue(unsigned char *buf, const types:: if (pos < gconcat_maxlen) { std::string src = si.separator + v.ToString(); // combine the delimeter and value - auto copylen = (pos + v.len + si.separator.length()) >= gconcat_maxlen ? (gconcat_maxlen - pos) - : (v.len + si.separator.length()); + auto copylen = (pos + v.len_ + si.separator.length()) >= gconcat_maxlen ? (gconcat_maxlen - pos) + : (v.len_ + si.separator.length()); std::memcpy(buf + pos, src.c_str(), copylen); // append the separator it->second = it->second + copylen; // update the length of the buffer } else { diff --git a/storage/tianmu/core/aggregator_basic.cpp b/storage/tianmu/core/aggregator_basic.cpp index b1f35a64a..edb45a7fc 100644 --- a/storage/tianmu/core/aggregator_basic.cpp +++ b/storage/tianmu/core/aggregator_basic.cpp @@ -280,13 +280,13 @@ void AggregatorMinD::Merge(unsigned char *buf, unsigned char *src_buf) { } void AggregatorMinT::PutAggregatedValue(unsigned char *buf, const types::BString &v, [[maybe_unused]] int64_t factor) { - DEBUG_ASSERT((uint)val_len >= v.len); + DEBUG_ASSERT((uint)val_len >= v.len_); if (*((unsigned short *)buf) == 0 && buf[2] == 0) { // still null stats_updated = false; std::memset(buf + 2, 0, val_len); - *((unsigned short *)buf) = v.len; - if (v.len > 0) - std::memcpy(buf + 2, v.val, v.len); + *((unsigned short *)buf) = v.len_; + if (v.len_ > 0) + std::memcpy(buf + 2, v.val_, v.len_); else buf[2] = 1; // empty string indicator (non-null) } else { @@ -294,9 +294,9 @@ void AggregatorMinT::PutAggregatedValue(unsigned char *buf, const types::BString if (m > v) { stats_updated = false; std::memset(buf + 2, 0, val_len); - *((unsigned short *)buf) = v.len; - if (v.len > 0) - std::memcpy(buf + 2, v.val, v.len); + *((unsigned short *)buf) = v.len_; + if (v.len_ > 0) + std::memcpy(buf + 2, v.val_, v.len_); else buf[2] = 1; // empty string indicator (non-null) } @@ -334,13 +334,13 @@ AggregatorMinT_UTF::AggregatorMinT_UTF(int max_len, DTCollation coll) : Aggregat void AggregatorMinT_UTF::PutAggregatedValue(unsigned char *buf, const types::BString &v, [[maybe_unused]] int64_t factor) { - DEBUG_ASSERT((uint)val_len >= v.len); + DEBUG_ASSERT((uint)val_len >= v.len_); if (*((unsigned short *)buf) == 0 && buf[2] == 0) { // still null stats_updated = false; std::memset(buf + 2, 0, val_len); - *((unsigned short *)buf) = v.len; - if (v.len > 0) - std::memcpy(buf + 2, v.val, v.len); + *((unsigned short *)buf) = v.len_; + if (v.len_ > 0) + std::memcpy(buf + 2, v.val_, v.len_); else buf[2] = 1; // empty string indicator (non-null) } else { @@ -348,9 +348,9 @@ void AggregatorMinT_UTF::PutAggregatedValue(unsigned char *buf, const types::BSt if (CollationStrCmp(collation, m, v) > 0) { stats_updated = false; std::memset(buf + 2, 0, val_len); - *((unsigned short *)buf) = v.len; - if (v.len > 0) - std::memcpy(buf + 2, v.val, v.len); + *((unsigned short *)buf) = v.len_; + if (v.len_ > 0) + std::memcpy(buf + 2, v.val_, v.len_); else buf[2] = 1; // empty string indicator (non-null) } @@ -426,21 +426,21 @@ AggregatorMaxT_UTF::AggregatorMaxT_UTF(int max_len, DTCollation coll) : Aggregat void AggregatorMaxT::PutAggregatedValue(unsigned char *buf, const types::BString &v, [[maybe_unused]] int64_t factor) { stats_updated = false; - DEBUG_ASSERT((uint)val_len >= v.len); + DEBUG_ASSERT((uint)val_len >= v.len_); if (*((unsigned short *)buf) == 0 && buf[2] == 0) { // still null std::memset(buf + 2, 0, val_len); - *((unsigned short *)buf) = v.len; - if (v.len > 0) - std::memcpy(buf + 2, v.val, v.len); + *((unsigned short *)buf) = v.len_; + if (v.len_ > 0) + std::memcpy(buf + 2, v.val_, v.len_); else buf[2] = 1; // empty string indicator (non-null) } else { types::BString m((char *)buf + 2, *((unsigned short *)buf)); if (m < v) { std::memset(buf + 2, 0, val_len); - *((unsigned short *)buf) = v.len; - if (v.len > 0) - std::memcpy(buf + 2, v.val, v.len); + *((unsigned short *)buf) = v.len_; + if (v.len_ > 0) + std::memcpy(buf + 2, v.val_, v.len_); else buf[2] = 1; // empty string indicator (non-null) } @@ -477,21 +477,21 @@ types::BString AggregatorMaxT::GetValueT(unsigned char *buf) { void AggregatorMaxT_UTF::PutAggregatedValue(unsigned char *buf, const types::BString &v, [[maybe_unused]] int64_t factor) { stats_updated = false; - DEBUG_ASSERT((uint)val_len >= v.len); + DEBUG_ASSERT((uint)val_len >= v.len_); if (*((unsigned short *)buf) == 0 && buf[2] == 0) { // still null std::memset(buf + 2, 0, val_len); - *((unsigned short *)buf) = v.len; - if (v.len > 0) - std::memcpy(buf + 2, v.val, v.len); + *((unsigned short *)buf) = v.len_; + if (v.len_ > 0) + std::memcpy(buf + 2, v.val_, v.len_); else buf[2] = 1; // empty string indicator (non-null) } else { types::BString m((char *)buf + 2, *((unsigned short *)buf)); if (CollationStrCmp(collation, m, v) < 0) { std::memset(buf + 2, 0, val_len); - *((unsigned short *)buf) = v.len; - if (v.len > 0) - std::memcpy(buf + 2, v.val, v.len); + *((unsigned short *)buf) = v.len_; + if (v.len_ > 0) + std::memcpy(buf + 2, v.val_, v.len_); else buf[2] = 1; // empty string indicator (non-null) } @@ -519,12 +519,12 @@ int64_t AggregatorList32::GetValue64(unsigned char *buf) { } void AggregatorListT::PutAggregatedValue(unsigned char *buf, const types::BString &v, [[maybe_unused]] int64_t factor) { - DEBUG_ASSERT((uint)val_len >= v.len); + DEBUG_ASSERT((uint)val_len >= v.len_); if (*((unsigned short *)buf) == 0 && buf[2] == 0) { // still null stats_updated = false; - *((unsigned short *)buf) = v.len; - if (v.len > 0) - std::memcpy(buf + 2, v.val, v.len); + *((unsigned short *)buf) = v.len_; + if (v.len_ > 0) + std::memcpy(buf + 2, v.val_, v.len_); else buf[2] = 1; // empty string indicator (non-null) value_set = true; diff --git a/storage/tianmu/core/cached_buffer.cpp b/storage/tianmu/core/cached_buffer.cpp index 25fe5cd95..b353fd1e6 100644 --- a/storage/tianmu/core/cached_buffer.cpp +++ b/storage/tianmu/core/cached_buffer.cpp @@ -87,15 +87,15 @@ void CachedBuffer::Set(uint64_t idx, const T &value) { } void CachedBuffer::Set(uint64_t idx, const types::BString &value) { - DEBUG_ASSERT(value.len <= elem_size); + DEBUG_ASSERT(value.len_ <= elem_size); if (idx / page_size != loaded_page) LoadPage((uint)(idx / page_size)); uint pos = (uint)(idx % page_size) * (elem_size + 4); uint *size = (uint *)&buf[pos]; if (value.IsNull()) *size = common::NULL_VALUE_U; else { - *size = value.len; - std::memcpy(&buf[pos + 4], value.val, value.len); + *size = value.len_; + std::memcpy(&buf[pos + 4], value.val_, value.len_); } page_changed = true; } diff --git a/storage/tianmu/core/column_bin_encoder.cpp b/storage/tianmu/core/column_bin_encoder.cpp index 67fa48e69..1077fc50b 100644 --- a/storage/tianmu/core/column_bin_encoder.cpp +++ b/storage/tianmu/core/column_bin_encoder.cpp @@ -833,9 +833,10 @@ void ColumnBinEncoder::EncoderText::Encode(uchar *buf, uchar *buf_sec, vcolumn:: if (s < mins) mins.PersistentCopy(s); } } - ASSERT(s.len <= (uint)size, "Size of buffer too small"); - if (s.len > 0) std::memcpy(buf, s.GetDataBytesPointer(), s.len); - uint32_t length = s.len + 1; + ASSERT(s.len_ <= (uint)size, "Size of buffer too small"); + if (s.len_ > 0) + std::memcpy(buf, s.GetDataBytesPointer(), s.len_); + uint32_t length = s.len_ + 1; std::memcpy(buf + size - sizeof(uint32_t), &length, sizeof(uint32_t)); if (descending) Negate(buf, size); } @@ -857,9 +858,10 @@ bool ColumnBinEncoder::EncoderText::EncodeString(uchar *buf, uchar *buf_sec, typ if (s < mins) mins.PersistentCopy(s); } } - ASSERT(s.len <= (uint)size, "Size of buffer too small"); - if (s.len > 0) std::memcpy(buf, s.GetDataBytesPointer(), s.len); - uint32_t length = s.len + 1; + ASSERT(s.len_ <= (uint)size, "Size of buffer too small"); + if (s.len_ > 0) + std::memcpy(buf, s.GetDataBytesPointer(), s.len_); + uint32_t length = s.len_ + 1; std::memcpy(buf + size - sizeof(uint32_t), &length, sizeof(uint32_t)); if (descending) Negate(buf, size); return true; @@ -887,10 +889,11 @@ types::BString ColumnBinEncoder::EncoderText::GetValueT(uchar *buf, uchar *buf_s } bool ColumnBinEncoder::EncoderText::ImpossibleStringValues(types::BString &pack_min, types::BString &pack_max) { - int lenmin = std::min(pack_min.len, maxs.len); - int lenmax = std::min(pack_max.len, mins.len); + int lenmin = std::min(pack_min.len_, maxs.len_); + int lenmax = std::min(pack_max.len_, mins.len_); - if (std::strncmp(pack_min.val, maxs.val, lenmin) > 0 || std::strncmp(pack_max.val, mins.val, lenmax) < 0) return true; + if (std::strncmp(pack_min.val_, maxs.val_, lenmin) > 0 || std::strncmp(pack_max.val_, mins.val_, lenmax) < 0) + return true; return false; } @@ -938,15 +941,16 @@ void ColumnBinEncoder::EncoderText_UTF::Encode(uchar *buf, uchar *buf_sec, vcolu if (CollationStrCmp(collation, s, mins) < 0) mins.PersistentCopy(s); } } - common::strnxfrm(collation, buf, size - sizeof(uint32_t), (uchar *)s.GetDataBytesPointer(), s.len); + common::strnxfrm(collation, buf, size - sizeof(uint32_t), (uchar *)s.GetDataBytesPointer(), s.len_); // int coded_len = types::CollationBufLen(collation, s.len); - uint32_t length = s.len + 1; + uint32_t length = s.len_ + 1; std::memcpy(buf + size - sizeof(uint32_t), &length, sizeof(uint32_t)); if (descending) Negate(buf, size); if (size_sec > 0) { std::memset(buf_sec, 0, size_sec); std::memcpy(buf_sec + size_sec - sizeof(uint32_t), &length, sizeof(uint32_t)); - if (s.len > 0) std::memcpy(buf_sec, s.GetDataBytesPointer(), s.len); + if (s.len_ > 0) + std::memcpy(buf_sec, s.GetDataBytesPointer(), s.len_); } } @@ -967,14 +971,15 @@ bool ColumnBinEncoder::EncoderText_UTF::EncodeString(uchar *buf, uchar *buf_sec, if (CollationStrCmp(collation, s, mins) < 0) mins = s; } } - common::strnxfrm(collation, buf, size - sizeof(uint32_t), (uchar *)s.GetDataBytesPointer(), s.len); - uint32_t length = s.len + 1; + common::strnxfrm(collation, buf, size - sizeof(uint32_t), (uchar *)s.GetDataBytesPointer(), s.len_); + uint32_t length = s.len_ + 1; std::memcpy(buf + size - sizeof(uint32_t), &length, sizeof(uint32_t)); if (descending) Negate(buf, size); if (size_sec > 0) { std::memset(buf_sec, 0, size_sec); std::memcpy(buf_sec + size_sec - sizeof(uint32_t), &length, sizeof(uint32_t)); - if (s.len > 0) std::memcpy(buf_sec, s.GetDataBytesPointer(), s.len); + if (s.len_ > 0) + std::memcpy(buf_sec, s.GetDataBytesPointer(), s.len_); } return true; } @@ -1006,8 +1011,8 @@ types::BString ColumnBinEncoder::EncoderText_UTF::GetValueT(uchar *buf, uchar *b bool ColumnBinEncoder::EncoderText_UTF::ImpossibleStringValues(types::BString &pack_min, types::BString &pack_max) { unsigned char min[8] = {}; unsigned char max[8] = {}; - std::memcpy(min, pack_min.val, pack_min.len); - std::memcpy(max, pack_max.val, pack_max.len); + std::memcpy(min, pack_min.val_, pack_min.len_); + std::memcpy(max, pack_max.val_, pack_max.len_); if (!maxs.GreaterEqThanMinUTF(min, collation) || !mins.LessEqThanMaxUTF(max, collation)) return true; return false; } @@ -1248,9 +1253,9 @@ void ColumnBinEncoder::EncoderTextMD5::Encode(uchar *buf, uchar *buf_sec, vcolum if (s < mins) mins.PersistentCopy(s); } } - if (s.len > 0) { - HashMD5((unsigned char *)s.GetDataBytesPointer(), s.len, buf); - *((uint *)buf) ^= s.len; + if (s.len_ > 0) { + HashMD5((unsigned char *)s.GetDataBytesPointer(), s.len_, buf); + *((uint *)buf) ^= s.len_; } else std::memcpy(buf, empty_buf, size); } @@ -1271,9 +1276,9 @@ bool ColumnBinEncoder::EncoderTextMD5::EncodeString(uchar *buf, uchar *buf_sec, if (s < mins) mins.PersistentCopy(s); } } - if (s.len > 0) { - HashMD5((unsigned char *)s.GetDataBytesPointer(), s.len, buf); - *((uint *)buf) ^= s.len; + if (s.len_ > 0) { + HashMD5((unsigned char *)s.GetDataBytesPointer(), s.len_, buf); + *((uint *)buf) ^= s.len_; } else std::memcpy(buf, empty_buf, size); return true; @@ -1288,10 +1293,11 @@ bool ColumnBinEncoder::EncoderTextMD5::IsNull(uchar *buf, [[maybe_unused]] uchar } bool ColumnBinEncoder::EncoderTextMD5::ImpossibleStringValues(types::BString &pack_min, types::BString &pack_max) { - int lenmin = std::min(pack_min.len, maxs.len); - int lenmax = std::min(pack_max.len, mins.len); + int lenmin = std::min(pack_min.len_, maxs.len_); + int lenmax = std::min(pack_max.len_, mins.len_); - if (std::strncmp(pack_min.val, maxs.val, lenmin) > 0 || std::strncmp(pack_max.val, mins.val, lenmax) < 0) return true; + if (std::strncmp(pack_min.val_, maxs.val_, lenmin) > 0 || std::strncmp(pack_max.val_, mins.val_, lenmax) < 0) + return true; return false; } diff --git a/storage/tianmu/core/condition_encoder.cpp b/storage/tianmu/core/condition_encoder.cpp index 6b7d46b96..7c59f7bfd 100644 --- a/storage/tianmu/core/condition_encoder.cpp +++ b/storage/tianmu/core/condition_encoder.cpp @@ -374,7 +374,7 @@ void ConditionEncoder::TransformLIKEsPattern() { desc->val1.vc->GetValueString(pattern, mit); uint min_len = 0; bool esc = false; - for (uint i = 0; i < pattern.len; i++) { + for (uint i = 0; i < pattern.len_; i++) { if (pattern[i] != '%') min_len++; else @@ -422,7 +422,7 @@ void ConditionEncoder::TransformLIKEsIntoINsOnLookup() { int res; if (types::RequiresUTFConversions(desc->GetCollation())) { types::BString s = attr->GetRealString(i); - res = !common::wildcmp(desc->GetCollation(), s.val, s.val + s.len, pattern.val, pattern.val + pattern.len, + res = !common::wildcmp(desc->GetCollation(), s.val_, s.val_ + s.len_, pattern.val_, pattern.val_ + pattern.len_, desc->like_esc, '_', '%'); } else res = attr->GetRealString(i).Like(pattern, desc->like_esc); diff --git a/storage/tianmu/core/descriptor.cpp b/storage/tianmu/core/descriptor.cpp index fd3038130..3afc0ccc3 100644 --- a/storage/tianmu/core/descriptor.cpp +++ b/storage/tianmu/core/descriptor.cpp @@ -980,8 +980,8 @@ bool Descriptor::CheckCondition_UTF(const MIIterator &mit) { types::BString v, pattern; attr.vc->GetNotNullValueString(v, mit); val1.vc->GetNotNullValueString(pattern, mit); - int x = - common::wildcmp(collation, v.val, v.val + v.len, pattern.val, pattern.val + pattern.len, like_esc, '_', '%'); + int x = common::wildcmp(collation, v.val_, v.val_ + v.len_, pattern.val_, pattern.val_ + pattern.len_, like_esc, + '_', '%'); result = (x == 0 ? true : false); if (op == common::Operator::O_LIKE) return result; diff --git a/storage/tianmu/core/engine_convert.cpp b/storage/tianmu/core/engine_convert.cpp index bbc73accb..2dca16017 100644 --- a/storage/tianmu/core/engine_convert.cpp +++ b/storage/tianmu/core/engine_convert.cpp @@ -55,10 +55,10 @@ bool Engine::ConvertToField(Field *field, types::RCDataType &rcitem, std::vector DEBUG_ASSERT(dynamic_cast(&rcitem)); Field_blob *blob = (Field_blob *)field; if (blob_buf == NULL) { - blob->set_ptr(((types::BString &)rcitem).len, (uchar *)((types::BString &)rcitem).val); + blob->set_ptr(((types::BString &)rcitem).len_, (uchar *)((types::BString &)rcitem).val_); blob->copy(); } else { - blob->store(((types::BString &)rcitem).val, ((types::BString &)rcitem).len, &my_charset_bin); + blob->store(((types::BString &)rcitem).val_, ((types::BString &)rcitem).len_, &my_charset_bin); uchar *src, *tgt; uint packlength = blob->pack_length_no_ptr(); @@ -143,10 +143,10 @@ bool Engine::ConvertToField(Field *field, types::RCDataType &rcitem, std::vector case MYSQL_TYPE_BLOB: { Field_blob *blob = (Field_blob *)field; if (blob_buf == NULL) { - blob->set_ptr(((types::BString &)rcitem).len, (uchar *)((types::BString &)rcitem).val); + blob->set_ptr(((types::BString &)rcitem).len_, (uchar *)((types::BString &)rcitem).val_); blob->copy(); } else { - blob->store(((types::BString &)rcitem).val, ((types::BString &)rcitem).len, &my_charset_bin); + blob->store(((types::BString &)rcitem).val_, ((types::BString &)rcitem).len_, &my_charset_bin); uchar *src, *tgt; uint packlength = blob->pack_length_no_ptr(); @@ -429,23 +429,23 @@ int Engine::Convert(int &is_null, String *value, types::RCDataType &rcitem, enum is_null = 0; if (f_type == MYSQL_TYPE_VARCHAR || f_type == MYSQL_TYPE_VAR_STRING) { types::BString str = rcitem.ToBString(); - value->set_ascii(str.val, str.len); + value->set_ascii(str.val_, str.len_); value->copy(); } else if (f_type == MYSQL_TYPE_STRING) { types::BString str = rcitem.ToBString(); - value->set_ascii(str.val, str.len); + value->set_ascii(str.val_, str.len_); value->copy(); } else if (f_type == MYSQL_TYPE_NEWDATE || f_type == MYSQL_TYPE_DATE) { types::BString str = rcitem.ToBString(); - value->set_ascii(str.val, str.len); + value->set_ascii(str.val_, str.len_); value->copy(); } else if (f_type == MYSQL_TYPE_TIME) { types::BString str = rcitem.ToBString(); - value->set_ascii(str.val, str.len); + value->set_ascii(str.val_, str.len_); value->copy(); } else if (f_type == MYSQL_TYPE_DATETIME) { types::BString str = rcitem.ToBString(); - value->set_ascii(str.val, str.len); + value->set_ascii(str.val_, str.len_); value->copy(); } else if (f_type == MYSQL_TYPE_TIMESTAMP) { if (types::RCDateTime *rcdt = dynamic_cast(&rcitem)) { @@ -463,12 +463,12 @@ int Engine::Convert(int &is_null, String *value, types::RCDataType &rcitem, enum } } else { types::BString str = rcitem.ToBString(); - value->set_ascii(str.val, str.len); + value->set_ascii(str.val_, str.len_); } value->copy(); } else if (f_type == MYSQL_TYPE_BLOB || f_type == MYSQL_TYPE_MEDIUM_BLOB) { types::BString str = rcitem.ToBString(); - value->set_ascii(str.val, str.len); + value->set_ascii(str.val_, str.len_); value->copy(); } return 1; diff --git a/storage/tianmu/core/just_a_table.cpp b/storage/tianmu/core/just_a_table.cpp index 2f2435ff5..19fb46519 100644 --- a/storage/tianmu/core/just_a_table.cpp +++ b/storage/tianmu/core/just_a_table.cpp @@ -35,7 +35,7 @@ ValueOrNull JustATable::GetComplexValue(const int64_t obj, const int attr) { MYSQL_TIME myt; MYSQL_TIME_STATUS not_used; // convert UTC timestamp given in string into TIME structure - str_to_datetime(s.GetDataBytesPointer(), s.len, &myt, TIME_DATETIME_ONLY, ¬_used); + str_to_datetime(s.GetDataBytesPointer(), s.len_, &myt, TIME_DATETIME_ONLY, ¬_used); return ValueOrNull(types::RCDateTime(myt, common::CT::TIMESTAMP).GetInt64()); } if (ct.IsFixed() || ct.IsFloat() || ct.IsDateTime()) return ValueOrNull(GetTable64(obj, attr)); diff --git a/storage/tianmu/core/rc_attr.cpp b/storage/tianmu/core/rc_attr.cpp index 8c82b8b8d..06ea48696 100644 --- a/storage/tianmu/core/rc_attr.cpp +++ b/storage/tianmu/core/rc_attr.cpp @@ -457,8 +457,8 @@ types::RCValueObject RCAttr::GetValue(int64_t obj, bool lookup_to_num) { else if (ATI::IsBinType(a_type)) { auto tmp_size = GetLength(obj); types::BString rcbs(NULL, tmp_size, true); - GetValueBin(obj, tmp_size, rcbs.val); - rcbs.null = false; + GetValueBin(obj, tmp_size, rcbs.val_); + rcbs.null_ = false; ret = rcbs; } else if (ATI::IsIntegerType(a_type)) ret = types::RCNum(GetNotNullValueInt64(obj), -1, false, a_type); @@ -468,7 +468,7 @@ types::RCValueObject RCAttr::GetValue(int64_t obj, bool lookup_to_num) { MYSQL_TIME myt; MYSQL_TIME_STATUS not_used; // convert UTC timestamp given in string into TIME structure - str_to_datetime(s.GetDataBytesPointer(), s.len, &myt, TIME_DATETIME_ONLY, ¬_used); + str_to_datetime(s.GetDataBytesPointer(), s.len_, &myt, TIME_DATETIME_ONLY, ¬_used); return types::RCDateTime(myt, common::CT::TIMESTAMP); } else if (ATI::IsDateTimeType(a_type)) ret = types::RCDateTime(this->GetNotNullValueInt64(obj), a_type); @@ -491,8 +491,8 @@ types::RCDataType &RCAttr::GetValueData(size_t obj, types::RCDataType &value, bo else if (ATI::IsBinType(a_type)) { auto tmp_size = GetLength(obj); ((types::BString &)value) = types::BString(NULL, tmp_size, true); - GetValueBin(obj, tmp_size, ((types::BString &)value).val); - value.null = false; + GetValueBin(obj, tmp_size, ((types::BString &)value).val_); + value.null_ = false; } else if (ATI::IsIntegerType(a_type)) ((types::RCNum &)value).Assign(GetNotNullValueInt64(obj), -1, false, a_type); else if (ATI::IsDateTimeType(a_type)) { @@ -622,7 +622,7 @@ int RCAttr::EncodeValue_T(const types::BString &rcbs, bool new_val, common::Erro if (ATI::IsStringType(TypeName())) { DEBUG_ASSERT(GetPackType() == common::PackType::INT); LoadPackInfo(); - int vs = m_dict->GetEncodedValue(rcbs.val, rcbs.len); + int vs = m_dict->GetEncodedValue(rcbs.val_, rcbs.len_); if (vs < 0) { if (!new_val) { return common::NULL_VALUE_32; @@ -638,11 +638,11 @@ int RCAttr::EncodeValue_T(const types::BString &rcbs, bool new_val, common::Erro hdr.dict_ver++; ha_rcengine_->cache.PutObject(FTreeCoordinate(m_tid, m_cid, hdr.dict_ver), m_dict); } - vs = m_dict->Add(rcbs.val, rcbs.len); + vs = m_dict->Add(rcbs.val_, rcbs.len_); } return vs; } - char const *val = rcbs.val; + char const *val = rcbs.val_; if (val == 0) val = ZERO_LENGTH_STRING; if (ATI::IsDateTimeType(TypeName()) || TypeName() == common::CT::BIGINT) { ASSERT(0, "Wrong data type!"); @@ -1308,7 +1308,7 @@ void RCAttr::UpdateIfIndex(uint64_t row, uint64_t col, const Value &v) { auto &vnew = v.GetString(); auto vold = GetValueString(row); std::string_view nkey(vnew.data(), vnew.length()); - std::string_view okey(vold.val, vold.size()); + std::string_view okey(vold.val_, vold.size()); common::ErrorCode returnCode = tab->UpdateIndex(current_txn_, nkey, okey, row); if (returnCode == common::ErrorCode::DUPP_KEY || returnCode == common::ErrorCode::FAILED) { TIANMU_LOG(LogCtl_Level::DEBUG, "Duplicate entry: %s for primary key", vnew.data()); @@ -1337,7 +1337,7 @@ void RCAttr::DeleteByPrimaryKey(uint64_t row, uint64_t col) { if (GetPackType() == common::PackType::STR) { auto currentValue = GetValueString(row); - std::string_view currentRowKey(currentValue.val, currentValue.size()); + std::string_view currentRowKey(currentValue.val_, currentValue.size()); common::ErrorCode returnCode = tab->DeleteIndex(current_txn_, currentRowKey, row); if (returnCode == common::ErrorCode::FAILED) { TIANMU_LOG(LogCtl_Level::DEBUG, "Delete: %s for primary key", currentValue.GetDataBytesPointer()); diff --git a/storage/tianmu/core/rcattr_exeq_rs.cpp b/storage/tianmu/core/rcattr_exeq_rs.cpp index e4a0217f7..788ecfaf1 100644 --- a/storage/tianmu/core/rcattr_exeq_rs.cpp +++ b/storage/tianmu/core/rcattr_exeq_rs.cpp @@ -95,15 +95,17 @@ common::RSValue RCAttr::RoughCheck(int pack, Descriptor &d, bool additional_null uint pack_prefix; if (types::RequiresUTFConversions(d.GetCollation())) { my_match_t mm; - if (d.GetCollation().collation->coll->instr(d.GetCollation().collation, pat.val, pat.len, "%", 1, &mm, 1) == 2) + if (d.GetCollation().collation->coll->instr(d.GetCollation().collation, pat.val_, pat.len_, "%", 1, &mm, 1) == + 2) pattern_prefix = pattern_fixed_prefix = mm.end; - if (d.GetCollation().collation->coll->instr(d.GetCollation().collation, pat.val, pat.len, "_", 1, &mm, 1) == 2) + if (d.GetCollation().collation->coll->instr(d.GetCollation().collation, pat.val_, pat.len_, "_", 1, &mm, 1) == + 2) if (mm.end < pattern_fixed_prefix) pattern_fixed_prefix = mm.end; if ((pattern_fixed_prefix > 0) && - types::BString(pat.val, pattern_fixed_prefix).LessEqThanMaxUTF(dpn.max_s, Type().GetCollation()) == false) + types::BString(pat.val_, pattern_fixed_prefix).LessEqThanMaxUTF(dpn.max_s, Type().GetCollation()) == false) res = common::RSValue::RS_NONE; if (pattern_fixed_prefix > GetActualSize(pack)) @@ -112,8 +114,8 @@ common::RSValue RCAttr::RoughCheck(int pack, Descriptor &d, bool additional_null if (res == common::RSValue::RS_SOME && pack_prefix > 0 && pattern_fixed_prefix <= pack_prefix // special case: "xyz%" and the // pack prefix is at least 3 - && pattern_fixed_prefix + 1 == pat.len && pat[pattern_fixed_prefix] == '%') { - if (d.GetCollation().collation->coll->strnncoll(d.GetCollation().collation, (const uchar *)pat.val, + && pattern_fixed_prefix + 1 == pat.len_ && pat[pattern_fixed_prefix] == '%') { + if (d.GetCollation().collation->coll->strnncoll(d.GetCollation().collation, (const uchar *)pat.val_, pattern_fixed_prefix, (const uchar *)dpn.min_s, pattern_fixed_prefix, 0) == 0) res = common::RSValue::RS_ALL; @@ -122,11 +124,11 @@ common::RSValue RCAttr::RoughCheck(int pack, Descriptor &d, bool additional_null } } else { - while (pattern_prefix < pat.len && pat[pattern_prefix] != '%') pattern_prefix++; - while (pattern_fixed_prefix < pat.len && pat[pattern_fixed_prefix] != '%' && pat[pattern_fixed_prefix] != '_') + while (pattern_prefix < pat.len_ && pat[pattern_prefix] != '%') pattern_prefix++; + while (pattern_fixed_prefix < pat.len_ && pat[pattern_fixed_prefix] != '%' && pat[pattern_fixed_prefix] != '_') pattern_fixed_prefix++; - if ((pattern_fixed_prefix > 0) && types::BString(pat.val, pattern_fixed_prefix).LessEqThanMax(dpn.max_s) == + if ((pattern_fixed_prefix > 0) && types::BString(pat.val_, pattern_fixed_prefix).LessEqThanMax(dpn.max_s) == false) // val_t==NULL means +/-infty res = common::RSValue::RS_NONE; if (pattern_fixed_prefix > GetActualSize(pack)) @@ -135,25 +137,25 @@ common::RSValue RCAttr::RoughCheck(int pack, Descriptor &d, bool additional_null if (res == common::RSValue::RS_SOME && pack_prefix > 0 && pattern_fixed_prefix <= pack_prefix // special case: "xyz%" and the // pack prefix is at least 3 - && pattern_fixed_prefix + 1 == pat.len && pat[pattern_fixed_prefix] == '%') { - if (std::memcmp(pat.val, dpn.min_s, pattern_fixed_prefix) == 0) // pattern is equal to the prefix + && pattern_fixed_prefix + 1 == pat.len_ && pat[pattern_fixed_prefix] == '%') { + if (std::memcmp(pat.val_, dpn.min_s, pattern_fixed_prefix) == 0) // pattern is equal to the prefix res = common::RSValue::RS_ALL; else res = common::RSValue::RS_NONE; // prefix and pattern are different } } - if (res == common::RSValue::RS_SOME && std::min(pattern_prefix, pack_prefix) < pat.len && + if (res == common::RSValue::RS_SOME && std::min(pattern_prefix, pack_prefix) < pat.len_ && !types::RequiresUTFConversions(d.GetCollation())) { types::BString pattern_for_cmap; // note that cmap is shifted by a common prefix! if (pattern_prefix > pack_prefix) - pattern_for_cmap = types::BString(pat.val + pack_prefix, - pat.len - pack_prefix); // "xyz%abc" -> "z%abc" + pattern_for_cmap = types::BString(pat.val_ + pack_prefix, + pat.len_ - pack_prefix); // "xyz%abc" -> "z%abc" else - pattern_for_cmap = types::BString(pat.val + pattern_prefix, - pat.len - pattern_prefix); // "xyz%abc" -> "%abc" + pattern_for_cmap = types::BString(pat.val_ + pattern_prefix, + pat.len_ - pattern_prefix); // "xyz%abc" -> "%abc" - if (!(pattern_for_cmap.len == 1 && pattern_for_cmap[0] == '%')) { // i.e. "%" => all is matching + if (!(pattern_for_cmap.len_ == 1 && pattern_for_cmap[0] == '%')) { // i.e. "%" => all is matching if (auto sp = GetFilter_CMap()) res = sp->IsLike(pattern_for_cmap, pack, d.like_esc); } else @@ -182,11 +184,11 @@ common::RSValue RCAttr::RoughCheck(int pack, Descriptor &d, bool additional_null for (vcolumn::MultiValColumn::Iterator it = mvc->begin(mit), end = mvc->end(mit); (it != end) && (res == common::RSValue::RS_NONE); ++it) { types::BString v1 = it->GetString(); - if (pack_prefix <= v1.len) { - if (pack_prefix == 0 || std::memcmp(v1.val, dpn.min_s, pack_prefix) == 0) { - size_t len = v1.len - pack_prefix; - types::BString v(len <= 0 ? "" : v1.val + pack_prefix, (int)len); - if (v1.len == pack_prefix || sp->IsValue(v, v, pack) != common::RSValue::RS_NONE) + if (pack_prefix <= v1.len_) { + if (pack_prefix == 0 || std::memcmp(v1.val_, dpn.min_s, pack_prefix) == 0) { + size_t len = v1.len_ - pack_prefix; + types::BString v(len <= 0 ? "" : v1.val_ + pack_prefix, (int)len); + if (v1.len_ == pack_prefix || sp->IsValue(v, v, pack) != common::RSValue::RS_NONE) // suspected, if any value is possible (due to the prefix or // CMAP) res = common::RSValue::RS_SOME; @@ -303,7 +305,7 @@ common::RSValue RCAttr::RoughCheck(int pack, Descriptor &d, bool additional_null vc2->GetValueString(vmax, mit); if (vmin.IsNull() && vmax.IsNull()) // comparing with null - always false return common::RSValue::RS_NONE; - while (vmin.val && vmax.val && val_prefix < vmin.len && val_prefix < vmax.len && + while (vmin.val_ && vmax.val_ && val_prefix < vmin.len_ && val_prefix < vmax.len_ && vmin[val_prefix] == vmax[val_prefix]) val_prefix++; // Common prefix for values. It is a value length in case // of equality. @@ -311,12 +313,12 @@ common::RSValue RCAttr::RoughCheck(int pack, Descriptor &d, bool additional_null // TODO UTF8: check PREFIX handling if (val_prefix > GetActualSize(pack)) { // value to be found is longer than texts in the pack res = common::RSValue::RS_NONE; - } else if ((vmax.val && vmax.GreaterEqThanMinUTF(dpn.min_s, Type().GetCollation()) == false) || - (vmin.val && + } else if ((vmax.val_ && vmax.GreaterEqThanMinUTF(dpn.min_s, Type().GetCollation()) == false) || + (vmin.val_ && vmin.LessEqThanMaxUTF(dpn.max_s, Type().GetCollation()) == false)) // val_t==NULL means +/-infty res = common::RSValue::RS_NONE; - else if ((vmin.val == NULL || vmin.GreaterEqThanMinUTF(dpn.min_s, Type().GetCollation()) == false) && - (vmax.val == NULL || + else if ((vmin.val_ == NULL || vmin.GreaterEqThanMinUTF(dpn.min_s, Type().GetCollation()) == false) && + (vmax.val_ == NULL || vmax.LessEqThanMaxUTF(dpn.max_s, Type().GetCollation()) == false)) // val_t==NULL means +/-infty res = common::RSValue::RS_ALL; else if (pack_prefix == GetActualSize(pack) && vmin == vmax) { // exact case for short texts @@ -327,7 +329,7 @@ common::RSValue RCAttr::RoughCheck(int pack, Descriptor &d, bool additional_null res = common::RSValue::RS_NONE; } - if (res == common::RSValue::RS_SOME && vmin.len >= pack_prefix && vmax.len >= pack_prefix && + if (res == common::RSValue::RS_SOME && vmin.len_ >= pack_prefix && vmax.len_ >= pack_prefix && !types::RequiresUTFConversions(d.GetCollation())) { vmin += pack_prefix; // redefine - shift by a common prefix vmax += pack_prefix; diff --git a/storage/tianmu/core/rcattr_exqp.cpp b/storage/tianmu/core/rcattr_exqp.cpp index d71e5b0df..07887e5c2 100644 --- a/storage/tianmu/core/rcattr_exqp.cpp +++ b/storage/tianmu/core/rcattr_exqp.cpp @@ -408,7 +408,7 @@ void RCAttr::EvaluatePack_Like(MIUpdatingIterator &mit, int dim, Descriptor &d) types::BString pattern; d.val1.vc->GetValueString(pattern, mit); size_t min_len = 0; // the number of fixed characters - for (uint i = 0; i < pattern.len; i++) { + for (uint i = 0; i < pattern.len_; i++) { if (pattern[i] != '%') min_len++; if (pattern[i] == d.like_esc) { // disable optimization, escape character @@ -482,7 +482,7 @@ void RCAttr::EvaluatePack_Like_UTF(MIUpdatingIterator &mit, int dim, Descriptor types::BString pattern; d.val1.vc->GetValueString(pattern, mit); size_t min_len = 0; // the number of fixed characters - for (uint i = 0; i < pattern.len; i++) + for (uint i = 0; i < pattern.len_; i++) if (pattern[i] != '%') min_len++; std::unordered_set possible_ids; @@ -522,8 +522,8 @@ void RCAttr::EvaluatePack_Like_UTF(MIUpdatingIterator &mit, int dim, Descriptor res = false; else { v.MakePersistent(); - int x = common::wildcmp(d.GetCollation(), v.val, v.val + v.len, pattern.val, pattern.val + pattern.len, '\\', - '_', '%'); + int x = common::wildcmp(d.GetCollation(), v.val_, v.val_ + v.len_, pattern.val_, pattern.val_ + pattern.len_, + '\\', '_', '%'); res = (x == 0 ? true : false); } if (d.op == common::Operator::O_NOT_LIKE) @@ -604,8 +604,8 @@ void RCAttr::EvaluatePack_InString_UTF(MIUpdatingIterator &mit, int dim, Descrip types::BString vt(p->GetValueBinary(inpack)); //, true if (arraysize > 0 && arraysize < 10) { for (auto &it : d.val1.cond_value) { - if (coll.collation->coll->strnncoll(coll.collation, (const uchar *)it.val, it.len, (const uchar *)vt.val, - vt.len, 0) == 0) { + if (coll.collation->coll->strnncoll(coll.collation, (const uchar *)it.val_, it.len_, (const uchar *)vt.val_, + vt.len_, 0) == 0) { res = true; break; } diff --git a/storage/tianmu/core/rsi_bloom.cpp b/storage/tianmu/core/rsi_bloom.cpp index 9eeadabe3..6f612573a 100644 --- a/storage/tianmu/core/rsi_bloom.cpp +++ b/storage/tianmu/core/rsi_bloom.cpp @@ -68,7 +68,7 @@ common::RSValue RSIndex_Bloom::IsValue(types::BString min_v, types::BString max_ // this pack no bloom filter data return common::RSValue::RS_SOME; } - Slice key(max_v.val, max_v.size()); + Slice key(max_v.val_, max_v.size()); // get filter data Slice pack_block(bf.data, bf.len); FilterBlockReader reader(bloom_filter_policy.get(), pack_block); diff --git a/storage/tianmu/core/rsi_cmap.cpp b/storage/tianmu/core/rsi_cmap.cpp index 5c46fddd6..0c62bc128 100644 --- a/storage/tianmu/core/rsi_cmap.cpp +++ b/storage/tianmu/core/rsi_cmap.cpp @@ -97,8 +97,10 @@ common::RSValue RSIndex_CMap::IsValue(types::BString min_v, types::BString max_v } else { // TODO: may be further optimized unsigned char f = 0, l = 255; - if (min_v.len > 0) f = (unsigned char)min_v[0]; // min_v.len == 0 usually means -inf - if (max_v.len > 0) l = (unsigned char)max_v[0]; + if (min_v.len_ > 0) + f = (unsigned char)min_v[0]; // min_v.len == 0 usually means -inf + if (max_v.len_ > 0) + l = (unsigned char)max_v[0]; if (f > l || !IsAnySet(pack, f, l, 0)) return common::RSValue::RS_NONE; return common::RSValue::RS_SOME; } @@ -106,9 +108,9 @@ common::RSValue RSIndex_CMap::IsValue(types::BString min_v, types::BString max_v common::RSValue RSIndex_CMap::IsLike(types::BString pattern, int pack, char escape_character) { // we can exclude cases: "ala%..." and "a_l_a%..." - char *p = pattern.val; // just for short... + char *p = pattern.val_; // just for short... uint pos = 0; - while (pos < pattern.len && pos < hdr.no_positions) { + while (pos < pattern.len_ && pos < hdr.no_positions) { if (p[pos] == '%' || p[pos] == escape_character) break; if (p[pos] != '_' && !IsSet(pack, p[pos], pos)) return common::RSValue::RS_NONE; pos++; diff --git a/storage/tianmu/core/value_or_null.cpp b/storage/tianmu/core/value_or_null.cpp index e988452ee..2e57ad0aa 100644 --- a/storage/tianmu/core/value_or_null.cpp +++ b/storage/tianmu/core/value_or_null.cpp @@ -28,14 +28,14 @@ void ValueOrNull::SetBString(const types::BString &rcs) { null = false; if (rcs.IsPersistent()) { string_owner = true; - sp = new char[rcs.len + 1]; - std::memcpy(sp, rcs.val, rcs.len); - sp[rcs.len] = 0; + sp = new char[rcs.len_ + 1]; + std::memcpy(sp, rcs.val_, rcs.len_); + sp[rcs.len_] = 0; } else { - sp = rcs.val; + sp = rcs.val_; string_owner = false; } - len = rcs.len; + len = rcs.len_; } } @@ -93,8 +93,8 @@ ValueOrNull::ValueOrNull(types::RCNum const &rcn) : x(rcn.GetValueInt64()), null ValueOrNull::ValueOrNull(types::RCDateTime const &rcdt) : x(rcdt.GetInt64()), null(rcdt.IsNull()) {} ValueOrNull::ValueOrNull(types::BString const &rcs) - : x(common::NULL_VALUE_64), sp(new char[rcs.len + 1]), len(rcs.len), string_owner(true), null(rcs.IsNull()) { - std::memcpy(sp, rcs.val, len); + : x(common::NULL_VALUE_64), sp(new char[rcs.len_ + 1]), len(rcs.len_), string_owner(true), null(rcs.IsNull()) { + std::memcpy(sp, rcs.val_, len); sp[len] = 0; } diff --git a/storage/tianmu/exporter/data_exporter_txt.cpp b/storage/tianmu/exporter/data_exporter_txt.cpp index 21a3c9520..326a51a48 100644 --- a/storage/tianmu/exporter/data_exporter_txt.cpp +++ b/storage/tianmu/exporter/data_exporter_txt.cpp @@ -33,9 +33,9 @@ DEforTxt::DEforTxt(const system::IOParameters &iop) void DEforTxt::PutText(const types::BString &str) { WriteStringQualifier(); size_t char_len = - deas[cur_attr].GetCollation().collation->cset->numchars(deas[cur_attr].GetCollation().collation, str.val, - str.val + str.len); // len in chars - WriteString(str, str.len); // len in bytes + deas[cur_attr].GetCollation().collation->cset->numchars(deas[cur_attr].GetCollation().collation, str.val_, + str.val_ + str.len_); // len in chars + WriteString(str, str.len_); // len in bytes if ((deas[cur_attr].Type() == common::CT::STRING) && (char_len < deas[cur_attr].CharLen())) // it can be necessary to change the WritePad implementation to something like: // collation->cset->fill(cs, copy->to_ptr+copy->from_length, @@ -61,7 +61,7 @@ void DEforTxt::PutBin(const types::BString &str) { // len = rcdea[cur_attr].size; if (len > 0) { char *hex = new char[len * 2]; - system::Convert2Hex((const unsigned char *)str.val, len, hex, len * 2, false); + system::Convert2Hex((const unsigned char *)str.val_, len, hex, len * 2, false); WriteString(types::BString(hex, len * 2)); delete[] hex; } diff --git a/storage/tianmu/loader/load_parser.cpp b/storage/tianmu/loader/load_parser.cpp index 99f96a767..76c4b3f4b 100644 --- a/storage/tianmu/loader/load_parser.cpp +++ b/storage/tianmu/loader/load_parser.cpp @@ -155,8 +155,8 @@ bool LoadParser::MakeValue(uint att, ValueCache &buffer) { if (attrs[att]->Type().IsLookup() && !buffer.ExpectedNull()) { types::BString s(ZERO_LENGTH_STRING, 0); buffer.Prepare(sizeof(int64_t)); - s.val = static_cast(buffer.PreparedBuffer()); - s.len = buffer.ExpectedSize(); + s.val_ = static_cast(buffer.PreparedBuffer()); + s.len_ = buffer.ExpectedSize(); *reinterpret_cast(buffer.PreparedBuffer()) = attrs[att]->EncodeValue_T(s, true); buffer.ExpectedSize(sizeof(int64_t)); } diff --git a/storage/tianmu/loader/value_cache.cpp b/storage/tianmu/loader/value_cache.cpp index 780d8f41f..9137ed9c4 100644 --- a/storage/tianmu/loader/value_cache.cpp +++ b/storage/tianmu/loader/value_cache.cpp @@ -92,7 +92,8 @@ void ValueCache::CalcStrStats(types::BString &min_s, types::BString &max_s, uint for (size_t i = 0; i < values_.size(); ++i) { if (!nulls_[i]) { types::BString v((Size(i) ? GetDataBytesPointer(i) : ZERO_LENGTH_STRING), Size(i)); - if (v.len > maxlen) maxlen = v.len; + if (v.len_ > maxlen) + maxlen = v.len_; if (min_s.IsNull()) min_s = v; diff --git a/storage/tianmu/system/channel_out.h b/storage/tianmu/system/channel_out.h index 27a06bbaf..7beb49340 100644 --- a/storage/tianmu/system/channel_out.h +++ b/storage/tianmu/system/channel_out.h @@ -49,7 +49,7 @@ class ChannelOut { virtual ChannelOut &operator<<(const std::string &str) = 0; ChannelOut &operator<<(types::BString &rcbs) { - for (ushort i = 0; i < rcbs.len; i++) (*this) << (char)(rcbs[i]); + for (ushort i = 0; i < rcbs.len_; i++) (*this) << (char)(rcbs[i]); return *this; } diff --git a/storage/tianmu/types/bstring.cpp b/storage/tianmu/types/bstring.cpp index 38e8c1948..f4342691f 100644 --- a/storage/tianmu/types/bstring.cpp +++ b/storage/tianmu/types/bstring.cpp @@ -23,54 +23,56 @@ namespace Tianmu { namespace types { -BString::BString() // null string +BString::BString() // null_ string { - null = true; - len = 0; - val = 0; - pos = 0; - persistent = false; + null_ = true; + len_ = 0; + val_ = 0; + pos_ = 0; + persistent_ = false; } -BString::BString(const char *v, size_t length, bool persistent) : persistent(persistent) { - // NOTE: we allow val to be NULL. In this case, no value will be copied (just +BString::BString(const char *v, size_t length, bool persistent) : persistent_(persistent) { + // NOTE: we allow val_ to be NULL. In this case, no value will be copied (just // reserve a place for future use). Only persistent! - pos = 0; - null = false; + pos_ = 0; + null_ = false; if (length == 0) { if (v) - len = std::strlen(v); + len_ = std::strlen(v); else - len = 0; + len_ = 0; } else - len = (uint)length; + len_ = (uint)length; if (persistent == false) - val = const_cast(v); + val_ = const_cast(v); else { - val = new char[len]; - if (v) std::memcpy(val, v, len); + val_ = new char[len_]; + if (v) + std::memcpy(val_, v, len_); } } -BString::BString(const BString &rcbs) : ValueBasic(rcbs), pos(rcbs.pos), persistent(rcbs.persistent) { - null = rcbs.null; - if (!null) { - len = rcbs.len; - if (persistent) { - val = new char[len + pos]; - std::memcpy(val, rcbs.val, len + pos); +BString::BString(const BString &rcbs) : ValueBasic(rcbs), pos_(rcbs.pos_), persistent_(rcbs.persistent_) { + null_ = rcbs.null_; + if (!null_) { + len_ = rcbs.len_; + if (persistent_) { + val_ = new char[len_ + pos_]; + std::memcpy(val_, rcbs.val_, len_ + pos_); } else - val = rcbs.val; + val_ = rcbs.val_; } else { - len = 0; - val = 0; - pos = 0; - persistent = false; + len_ = 0; + val_ = 0; + pos_ = 0; + persistent_ = false; } } BString::~BString() { - if (persistent) delete[] val; + if (persistent_) + delete[] val_; } BString &BString::operator=(const RCDataType &rcdt) { @@ -92,67 +94,72 @@ bool BString::Parse(BString &in, BString &out) { common::CT BString::Type() const { return common::CT::STRING; } void BString::PutString(char *&dest, ushort len, bool move_ptr) const { - ASSERT(this->len <= len, "should be 'this->len <= len'"); - if (this->len == 0) + ASSERT(this->len_ <= len, "should be 'this->len_ <= len'"); + if (this->len_ == 0) std::memset(dest, ' ', len); else { - std::memcpy(dest, val, this->len); - std::memset(dest + this->len, ' ', len - this->len); + std::memcpy(dest, val_, this->len_); + std::memset(dest + this->len_, ' ', len - this->len_); } if (move_ptr) dest += len; } void BString::PutVarchar(char *&dest, uchar prefixlen, bool move_ptr) const { - if (prefixlen == 0) PutString(dest, len); - if (len == 0) { + if (prefixlen == 0) + PutString(dest, len_); + if (len_ == 0) { std::memset(dest, 0, prefixlen); if (move_ptr) dest += prefixlen; } else { switch (prefixlen) { case 1: - *(uchar *)dest = (uchar)len; + *(uchar *)dest = (uchar)len_; break; case 2: - *(ushort *)dest = (ushort)len; + *(ushort *)dest = (ushort)len_; break; case 4: - *(uint *)dest = (uint)len; + *(uint *)dest = (uint)len_; break; default: TIANMU_ERROR("not implemented"); } - std::memcpy(dest + prefixlen, val, len); - if (move_ptr) dest += prefixlen + len; + std::memcpy(dest + prefixlen, val_, len_); + if (move_ptr) + dest += prefixlen + len_; } } BString &BString::operator=(const BString &rcbs) { if (this == &rcbs) return *this; - null = rcbs.null; - if (null) { - if (persistent) delete[] val; - val = 0; - len = 0; - pos = 0; + null_ = rcbs.null_; + if (null_) { + if (persistent_) + delete[] val_; + val_ = 0; + len_ = 0; + pos_ = 0; } else { - if (rcbs.persistent) { - uint tmp_len = rcbs.len + rcbs.pos; - if (!persistent || tmp_len > len + pos) { - if (persistent) delete[] val; - val = new char[tmp_len]; + if (rcbs.persistent_) { + uint tmp_len = rcbs.len_ + rcbs.pos_; + if (!persistent_ || tmp_len > len_ + pos_) { + if (persistent_) + delete[] val_; + val_ = new char[tmp_len]; } - len = rcbs.len; - pos = rcbs.pos; - std::memcpy(val, rcbs.val, len + pos); + len_ = rcbs.len_; + pos_ = rcbs.pos_; + std::memcpy(val_, rcbs.val_, len_ + pos_); } else { - if (persistent) delete[] val; - len = rcbs.len; - pos = rcbs.pos; - val = rcbs.val; + if (persistent_) + delete[] val_; + len_ = rcbs.len_; + pos_ = rcbs.pos_; + val_ = rcbs.val_; } } - persistent = rcbs.persistent; + persistent_ = rcbs.persistent_; return *this; } @@ -162,46 +169,48 @@ void BString::PersistentCopy(const BString &rcbs) { return; } - null = rcbs.null; - if (null) { - delete[] val; - val = 0; - len = 0; - pos = 0; + null_ = rcbs.null_; + if (null_) { + delete[] val_; + val_ = 0; + len_ = 0; + pos_ = 0; } else { - uint tmp_len = rcbs.len + rcbs.pos; - if (!persistent || tmp_len > len + pos) { - if (persistent) delete[] val; - val = new char[tmp_len]; + uint tmp_len = rcbs.len_ + rcbs.pos_; + if (!persistent_ || tmp_len > len_ + pos_) { + if (persistent_) + delete[] val_; + val_ = new char[tmp_len]; } - len = rcbs.len; - pos = rcbs.pos; - std::memcpy(val, rcbs.val, len + pos); + len_ = rcbs.len_; + pos_ = rcbs.pos_; + std::memcpy(val_, rcbs.val_, len_ + pos_); } - persistent = true; + persistent_ = true; } std::string BString::ToString() const { - if (len) return std::string(val + pos, len); + if (len_) + return std::string(val_ + pos_, len_); return std::string(); } char &BString::operator[](size_t pos) const { - DEBUG_ASSERT(pos < len); // Out of BString. Note: val is not ended by '\0'. - return val[this->pos + pos]; + DEBUG_ASSERT(pos < len_); // Out of BString. Note: val_ is not ended by '\0'. + return val_[this->pos_ + pos]; } BString &BString::operator+=(ushort pos) { - DEBUG_ASSERT((int)len - pos >= 0); - this->pos = this->pos + (ushort)pos; - this->len -= pos; + DEBUG_ASSERT((int)len_ - pos >= 0); + this->pos_ = this->pos_ + (ushort)pos; + this->len_ -= pos; return *this; } BString &BString::operator-=(ushort pos) { - DEBUG_ASSERT(pos <= this->pos); - this->pos = this->pos - (ushort)pos; - this->len += pos; + DEBUG_ASSERT(pos <= this->pos_); + this->pos_ = this->pos_ - (ushort)pos; + this->len_ += pos; return *this; } @@ -210,11 +219,11 @@ bool BString::Like(const BString &pattern, char escape_character) { BString processed_pattern; // to be used as an alternative source in case of // processed pattern (escape chars) BString processed_wildcards; - char *p = pattern.val; // a short for pattern (or processed pattern) - char *w = pattern.val; // a short for wildcard map (or na original pattern, - // if no escape chars) - char *v = val + pos; // a short for the data itself - uint pattern_len = pattern.len; + char *p = pattern.val_; // a short for pattern (or processed pattern) + char *w = pattern.val_; // a short for wildcard map (or na original pattern, + // if no escape chars) + char *v = val_ + pos_; // a short for the data itself + uint pattern_len = pattern.len_; // Escape characters processing bool escaped = false; @@ -248,8 +257,8 @@ bool BString::Like(const BString &pattern, char escape_character) { } } pattern_len = i; // the rest of pattern buffers are just ignored - p = processed_pattern.val; - w = processed_wildcards.val; + p = processed_pattern.val_; + w = processed_wildcards.val_; } // Pattern processing @@ -265,12 +274,12 @@ bool BString::Like(const BString &pattern, char escape_character) { cur_s_beg = cur_s; cur_p_beg = cur_p; do { // internal loop: try to match a part between %...% - while (cur_p < pattern_len && cur_s < len && // find the first match... + while (cur_p < pattern_len && cur_s < len_ && // find the first match... (v[cur_s] == p[cur_p] || w[cur_p] == '_') && w[cur_p] != '%') { cur_s++; cur_p++; } - if (cur_s < len && + if (cur_s < len_ && ((cur_p < pattern_len && w[cur_p] != '%') || cur_p >= pattern_len)) { // not matching (loop finished // prematurely) - try the next source // position @@ -279,7 +288,7 @@ bool BString::Like(const BString &pattern, char escape_character) { cur_s = ++cur_s_beg; // step forward in the source, rewind the matching // pointers } - if (cur_s == len) { // end of the source + if (cur_s == len_) { // end of the source while (cur_p < pattern_len) { if (w[cur_p] != '%') // Pattern nontrivial yet? No more chances for matching. return false; @@ -288,35 +297,39 @@ bool BString::Like(const BString &pattern, char escape_character) { return true; } } while (cur_p < pattern_len && w[cur_p] != '%'); // try the next match position - } while (cur_p < pattern_len && cur_s < len); + } while (cur_p < pattern_len && cur_s < len_); return true; } void BString::MakePersistent() { - if (persistent) return; - char *n_val = new char[len + pos]; - std::memcpy(n_val, val, len + pos); - val = n_val; - persistent = true; + if (persistent_) + return; + char *n_val = new char[len_ + pos_]; + std::memcpy(n_val, val_, len_ + pos_); + val_ = n_val; + persistent_ = true; } bool BString::GreaterEqThanMin(const void *txt_min) { const unsigned char *s = reinterpret_cast(txt_min); - if (null == true) return false; + if (null_ == true) + return false; uint min_len = 8; while (min_len > 0 && s[min_len - 1] == '\0') min_len--; - for (uint i = 0; i < min_len && i < len; i++) - if (((unsigned char *)val)[i + pos] < s[i]) + for (uint i = 0; i < min_len && i < len_; i++) + if (((unsigned char *)val_)[i + pos_] < s[i]) return false; - else if (((unsigned char *)val)[i + pos] > s[i]) + else if (((unsigned char *)val_)[i + pos_] > s[i]) return true; - if (len < min_len) return false; + if (len_ < min_len) + return false; return true; } bool BString::GreaterEqThanMinUTF(const void *txt_min, DTCollation col, bool use_full_len) { - if (null == true) return false; + if (null_ == true) + return false; if (RequiresUTFConversions(col)) { uint useful_len = 0; const char *s = reinterpret_cast(txt_min); @@ -326,33 +339,36 @@ bool BString::GreaterEqThanMinUTF(const void *txt_min, DTCollation col, bool use uint next_char_len, chars_included = 0; while (true) { - if (useful_len >= len || chars_included == min_charlen) break; - next_char_len = col.collation->cset->mbcharlen(col.collation, (uchar)val[useful_len + pos]); + if (useful_len >= len_ || chars_included == min_charlen) + break; + next_char_len = col.collation->cset->mbcharlen(col.collation, (uchar)val_[useful_len + pos_]); DEBUG_ASSERT("wide character unrecognized" && next_char_len > 0); useful_len += next_char_len; chars_included++; } } else - useful_len = len; - return col.collation->coll->strnncoll(col.collation, (uchar *)val, useful_len, + useful_len = len_; + return col.collation->coll->strnncoll(col.collation, (uchar *)val_, useful_len, reinterpret_cast(txt_min), min_byte_len, 0) >= 0; } else return GreaterEqThanMin(txt_min); } bool BString::LessEqThanMax(const void *txt_max) { - if (null == true) return false; + if (null_ == true) + return false; const unsigned char *s = reinterpret_cast(txt_max); - for (uint i = 0; i < 8 && i < len; i++) - if (((unsigned char *)val)[i + pos] > s[i]) + for (uint i = 0; i < 8 && i < len_; i++) + if (((unsigned char *)val_)[i + pos_] > s[i]) return false; - else if (((unsigned char *)val)[i + pos] < s[i]) + else if (((unsigned char *)val_)[i + pos_] < s[i]) return true; return true; } bool BString::LessEqThanMaxUTF(const void *txt_max, DTCollation col, bool use_full_len) { - if (null == true) return false; + if (null_ == true) + return false; if (RequiresUTFConversions(col)) { uint useful_len = 0; const char *s = reinterpret_cast(txt_max); @@ -362,84 +378,94 @@ bool BString::LessEqThanMaxUTF(const void *txt_max, DTCollation col, bool use_fu uint next_char_len, chars_included = 0; while (true) { - if (useful_len >= len || chars_included == max_charlen) break; - next_char_len = col.collation->cset->mbcharlen(col.collation, (uchar)val[useful_len + pos]); + if (useful_len >= len_ || chars_included == max_charlen) + break; + next_char_len = col.collation->cset->mbcharlen(col.collation, (uchar)val_[useful_len + pos_]); DEBUG_ASSERT("wide character unrecognized" && next_char_len > 0); useful_len += next_char_len; chars_included++; } } else - useful_len = len; - return col.collation->coll->strnncoll(col.collation, (uchar *)val, useful_len, + useful_len = len_; + return col.collation->coll->strnncoll(col.collation, (uchar *)val_, useful_len, reinterpret_cast(txt_max), max_byte_len, 0) <= 0; } else return LessEqThanMax(txt_max); } bool BString::IsEmpty() const { - if (null == true) return false; - return len == 0 ? true : false; + if (null_ == true) + return false; + return len_ == 0 ? true : false; } -bool BString::IsNullOrEmpty() const { return ((len == 0 || null) ? true : false); } +bool BString::IsNullOrEmpty() const { return ((len_ == 0 || null_) ? true : false); } bool BString::operator==(const RCDataType &rcdt) const { - if (null || rcdt.IsNull()) return false; + if (null_ || rcdt.IsNull()) + return false; if (rcdt.GetValueType() == ValueTypeEnum::STRING_TYPE) return CompareWith((BString &)rcdt) == 0; return CompareWith(rcdt.ToBString()) == 0; } bool BString::operator==(const BString &rcs) const { - if (null || rcs.IsNull()) return false; + if (null_ || rcs.IsNull()) + return false; return CompareWith(rcs) == 0; } bool BString::operator<(const RCDataType &rcdt) const { - if (null || rcdt.IsNull()) return false; + if (null_ || rcdt.IsNull()) + return false; if (rcdt.GetValueType() == ValueTypeEnum::STRING_TYPE) return CompareWith((BString &)rcdt) < 0; return CompareWith(rcdt.ToBString()) < 0; } bool BString::operator>(const RCDataType &rcdt) const { - if (null || rcdt.IsNull()) return false; + if (null_ || rcdt.IsNull()) + return false; if (rcdt.GetValueType() == ValueTypeEnum::STRING_TYPE) return CompareWith((BString &)rcdt) > 0; return CompareWith(rcdt.ToBString()) > 0; } bool BString::operator>=(const RCDataType &rcdt) const { - if (null || rcdt.IsNull()) return false; + if (null_ || rcdt.IsNull()) + return false; if (rcdt.GetValueType() == ValueTypeEnum::STRING_TYPE) return CompareWith((BString &)rcdt) >= 0; return CompareWith(rcdt.ToBString()) >= 0; } bool BString::operator<=(const RCDataType &rcdt) const { - if (null || rcdt.IsNull()) return false; + if (null_ || rcdt.IsNull()) + return false; if (rcdt.GetValueType() == ValueTypeEnum::STRING_TYPE) return CompareWith((BString &)rcdt) <= 0; return CompareWith(rcdt.ToBString()) <= 0; } bool BString::operator!=(const RCDataType &rcdt) const { - if (null || rcdt.IsNull()) return true; + if (null_ || rcdt.IsNull()) + return true; if (rcdt.GetValueType() == ValueTypeEnum::STRING_TYPE) return CompareWith((BString &)rcdt) != 0; return CompareWith(rcdt.ToBString()) != 0; } uint BString::GetHashCode() const { - if (null) return 0; + if (null_) + return 0; uint hc = 0; int a = 1040021; - for (uint i = 0; i < len; i++) hc = (hc * a + val[i]) & 1048575; + for (uint i = 0; i < len_; i++) hc = (hc * a + val_[i]) & 1048575; return hc; } std::ostream &operator<<(std::ostream &out, const BString &rcbs) { - out.write(rcbs.val + rcbs.pos, rcbs.len); + out.write(rcbs.val_ + rcbs.pos_, rcbs.len_); return out; } void BString::CopyTo(void *dest, size_t count) const { - uint l = (len - pos) < count ? (len - pos) : count; - std::memcpy(dest, val + pos, l); + uint l = (len_ - pos_) < count ? (len_ - pos_) : count; + std::memcpy(dest, val_ + pos_, l); if (l <= count) std::memset((char *)dest + l, 0, count - l); } @@ -453,8 +479,9 @@ size_t BString::RoundUpTo8Bytes(const DTCollation &dt) const { if (dt.collation->mbmaxlen > 1) { int next_char_len; while (true) { - if (useful_len >= len) break; - next_char_len = dt.collation->cset->mbcharlen(dt.collation, (uchar)val[useful_len + pos]); + if (useful_len >= len_) + break; + next_char_len = dt.collation->cset->mbcharlen(dt.collation, (uchar)val_[useful_len + pos_]); if (next_char_len == 0) { TIANMU_LOG(LogCtl_Level::WARN, "RoundUpTo8Bytes() detect non-UTF8 character"); @@ -466,7 +493,7 @@ size_t BString::RoundUpTo8Bytes(const DTCollation &dt) const { useful_len += next_char_len; } } else - useful_len = len > 8 ? 8 : len; + useful_len = len_ > 8 ? 8 : len_; return useful_len; } diff --git a/storage/tianmu/types/rc_data_types.h b/storage/tianmu/types/rc_data_types.h index d8ac5b0f6..c16822ffd 100644 --- a/storage/tianmu/types/rc_data_types.h +++ b/storage/tianmu/types/rc_data_types.h @@ -127,7 +127,7 @@ enum class ValueTypeEnum { NULL_TYPE, DATE_TIME_TYPE, NUMERIC_TYPE, STRING_TYPE class RCDataType { public: - RCDataType() : null(true) {} + RCDataType() : null_(true) {} virtual ~RCDataType(); public: @@ -147,13 +147,13 @@ class RCDataType { virtual bool operator<=(const RCDataType &rcdt) const = 0; virtual bool operator!=(const RCDataType &rcdt) const = 0; - bool IsNull() const { return null; } + bool IsNull() const { return null_; } virtual uint GetHashCode() const = 0; virtual char *GetDataBytesPointer() const = 0; - void SetToNull() { null = true; } - bool null; + void SetToNull() { null_ = true; } + bool null_; public: static ValueTypeEnum GetValueType(common::CT attr_type); @@ -168,15 +168,15 @@ class RCDataType { template class ValueBasic : public RCDataType { public: - ValueTypeEnum GetValueType() const override { return T::value_type; } + ValueTypeEnum GetValueType() const override { return T::value_type_; } std::unique_ptr Clone() const override { return std::unique_ptr(new T((T &)*this)); }; - static T null_value; - static T &NullValue() { return T::null_value; } + static T null_value_; + static T &NullValue() { return T::null_value_; } using RCDataType::operator=; }; template -T ValueBasic::null_value; +T ValueBasic::null_value_; using CondArray = std::vector; @@ -187,16 +187,16 @@ class BString : public ValueBasic { public: BString(); BString(const char *val, size_t len = 0, bool materialize = false); - // len == -1 or -2 => the length is stored on the first 2 or 4 (respectively, - // ushort / int) bytes of val. len == 0 => the length is a result of - // std::strlen(val), i.e. val is 0-terminated zero-term = true => this is a - // non-null empty string, or a longer zero-terminated string + // len_ == -1 or -2 => the length is stored on the first 2 or 4 (respectively, + // ushort / int) bytes of val_. len_ == 0 => the length is a result of + // std::strlen(val_), i.e. val_ is 0-terminated zero-term = true => this is a + // non-null_ empty string, or a longer zero-terminated string BString(const BString &rcbs); ~BString(); BString &operator=(const BString &rcbs); BString &operator=(const RCDataType &rcn) override; - void PersistentCopy(const BString &rcbs); // like "=", but makes this persistent + void PersistentCopy(const BString &rcbs); // like "=", but makes this persistent_ static bool Parse(BString &in, BString &out); common::CT Type() const override; @@ -204,15 +204,15 @@ class BString : public ValueBasic { void PutString(char *&dest, ushort len, bool move_ptr = true) const; void PutVarchar(char *&dest, uchar prefixlen, bool move_ptr) const; void MakePersistent(); - bool IsPersistent() const { return persistent; } - bool IsEmpty() const; // return true if this is 0 len string, if this is null + bool IsPersistent() const { return persistent_; } + bool IsEmpty() const; // return true if this is 0 len_ string, if this is null_ // this function will return false - bool IsNullOrEmpty() const; // return true if this is null or this is 0 len string + bool IsNullOrEmpty() const; // return true if this is null_ or this is 0 len_ string std::string ToString() const; BString ToBString() const override { return *this; } - char *GetDataBytesPointer() const override { return val + pos; } + char *GetDataBytesPointer() const override { return val_ + pos_; } char *begin() const { return GetDataBytesPointer(); } - char *end() const { return begin() + len; } + char *end() const { return begin() + len_; } BString &operator+=(ushort pos); BString &operator-=(ushort pos); @@ -229,32 +229,36 @@ class BString : public ValueBasic { // this is fast for string literal bool Equals(const char *s, uint l) const { - if (l != len) return false; - return std::memcmp(s, val, l) == 0; + if (l != len_) + return false; + return std::memcmp(s, val_, l) == 0; } int CompareWith(const BString &rcbs2) const { - int l = std::min(len, rcbs2.len); + int l = std::min(len_, rcbs2.len_); if (l == 0) { - if (len == 0 && rcbs2.len == 0) return 0; + if (len_ == 0 && rcbs2.len_ == 0) + return 0; - if (len == 0) return -1; + if (len_ == 0) + return -1; return 1; } - if (len != rcbs2.len) { - int ret = std::memcmp(val + pos, rcbs2.val + rcbs2.pos, l); + if (len_ != rcbs2.len_) { + int ret = std::memcmp(val_ + pos_, rcbs2.val_ + rcbs2.pos_, l); if (ret == 0) { - if (len < rcbs2.len) return -1; + if (len_ < rcbs2.len_) + return -1; return 1; } return ret; } // equal length - return std::memcmp(val + pos, rcbs2.val + rcbs2.pos, l); + return std::memcmp(val_ + pos_, rcbs2.val_ + rcbs2.pos_, l); } // Wildcards: "_" is any character, "%" is 0 or more characters @@ -266,18 +270,18 @@ class BString : public ValueBasic { bool LessEqThanMaxUTF(const void *txt_max, DTCollation col, bool use_full_len = false); uint GetHashCode() const override; - size_t size() const { return len; } + size_t size() const { return len_; } char &operator[](size_t pos) const; - char *val; - uint len; - uint pos; + char *val_; + uint len_; + uint pos_; private: - bool persistent; + bool persistent_; public: - const static ValueTypeEnum value_type = ValueTypeEnum::STRING_TYPE; + const static ValueTypeEnum value_type_ = ValueTypeEnum::STRING_TYPE; }; class RCDateTime : public ValueBasic { @@ -300,7 +304,7 @@ class RCDateTime : public ValueBasic { RCDateTime &operator=(const RCDataType &rcdt) override; RCDateTime &Assign(int64_t v, common::CT at); - void Store(MYSQL_TIME *my_time, enum_mysql_timestamp_type t) { dt.Store(my_time, t); } + void Store(MYSQL_TIME *my_time, enum_mysql_timestamp_type t) { dt_.Store(my_time, t); } bool IsZero() const; int64_t GetInt64() const; bool GetInt64(int64_t &value) const { @@ -317,7 +321,7 @@ class RCDateTime : public ValueBasic { * \return false if it is NULL, true otherwise */ bool ToInt64(int64_t &value) const; - char *GetDataBytesPointer() const override { return (char *)&dt; } + char *GetDataBytesPointer() const override { return (char *)&dt_; } BString ToBString() const override; common::CT Type() const override; uint GetHashCode() const override; @@ -330,20 +334,21 @@ class RCDateTime : public ValueBasic { bool operator!=(const RCDataType &rcdt) const override; int64_t operator-(const RCDateTime &sec) const; // difference in days, only for common::CT::DATE - short Year() const { return dt.year; } - short Month() const { return dt.month; } - short Day() const { return dt.day; } + short Year() const { return dt_.year; } + short Month() const { return dt_.month; } + short Day() const { return dt_.day; } short Hour() const { - if (at != common::CT::TIME) return dt.hour; - return dt.time_hour; + if (at_ != common::CT::TIME) + return dt_.hour; + return dt_.time_hour; } - short Minute() const { return dt.minute; } - short Second() const { return dt.second; } - int MicroSecond() const { return dt.microsecond; } + short Minute() const { return dt_.minute; } + short Second() const { return dt_.second; } + int MicroSecond() const { return dt_.microsecond; } private: - DT dt{}; - common::CT at; + DT dt_{}; + common::CT at_; private: int compare(const RCDateTime &rcdt) const; @@ -379,7 +384,7 @@ class RCDateTime : public ValueBasic { static RCDateTime GetCurrent(); public: - const static ValueTypeEnum value_type = ValueTypeEnum::DATE_TIME_TYPE; + const static ValueTypeEnum value_type_ = ValueTypeEnum::DATE_TIME_TYPE; }; class RCValueObject { @@ -409,27 +414,27 @@ class RCValueObject { bool IsNull() const; - common::CT Type() const { return value.get() ? value->Type() : common::CT::UNK; } - ValueTypeEnum GetValueType() const { return value.get() ? value->GetValueType() : ValueTypeEnum::NULL_TYPE; } + common::CT Type() const { return value_.get() ? value_->Type() : common::CT::UNK; } + ValueTypeEnum GetValueType() const { return value_.get() ? value_->GetValueType() : ValueTypeEnum::NULL_TYPE; } BString ToBString() const; - // operator RCDataType*() { return value.get(); } - RCDataType *Get() const { return value.get(); } + // operator RCDataType*() { return value_.get(); } + RCDataType *Get() const { return value_.get(); } RCDataType &operator*() const; - // RCDataType& operator*() const { DEBUG_ASSERT(value.get()); return - // *value; } + // RCDataType& operator*() const { DEBUG_ASSERT(value_.get()); return + // *value_; } operator RCNum &() const; // operator BString&() const; operator RCDateTime &() const; uint GetHashCode() const; - char *GetDataBytesPointer() const { return value->GetDataBytesPointer(); } + char *GetDataBytesPointer() const { return value_->GetDataBytesPointer(); } private: inline void construct(const RCDataType &rcdt); protected: - std::unique_ptr value; + std::unique_ptr value_; public: static bool compare(const RCValueObject &rcvo1, const RCValueObject &rcvo2, common::Operator op, char like_esc); @@ -457,22 +462,22 @@ class rc_hash_compare { */ static inline void ConvertToBinaryForm(const BString &src, BString &dst, DTCollation coll) { if (!src.IsNull()) { - coll.collation->coll->strnxfrm(coll.collation, (uchar *)dst.val, dst.len, dst.len, (uchar *)(src.val), src.len, + coll.collation->coll->strnxfrm(coll.collation, (uchar *)dst.val_, dst.len_, dst.len_, (uchar *)(src.val_), src.len_, MY_STRXFRM_PAD_WITH_SPACE); - dst.null = false; + dst.null_ = false; } else { - dst.null = true; + dst.null_ = true; } } static int inline CollationStrCmp(DTCollation coll, const BString &s1, const BString &s2) { - return coll.collation->coll->strnncoll(coll.collation, (const uchar *)s1.val, s1.len, (const uchar *)s2.val, s2.len, - 0); + return coll.collation->coll->strnncoll(coll.collation, (const uchar *)s1.val_, s1.len_, (const uchar *)s2.val_, + s2.len_, 0); } static bool inline CollationStrCmp(DTCollation coll, const BString &s1, const BString &s2, common::Operator op) { - int res = - coll.collation->coll->strnncoll(coll.collation, (const uchar *)s1.val, s1.len, (const uchar *)s2.val, s2.len, 0); + int res = coll.collation->coll->strnncoll(coll.collation, (const uchar *)s1.val_, s1.len_, (const uchar *)s2.val_, + s2.len_, 0); switch (op) { case common::Operator::O_EQ: return (res == 0); diff --git a/storage/tianmu/types/rc_datetime.cpp b/storage/tianmu/types/rc_datetime.cpp index 9cc6a1e3e..8e4de8471 100644 --- a/storage/tianmu/types/rc_datetime.cpp +++ b/storage/tianmu/types/rc_datetime.cpp @@ -27,79 +27,79 @@ namespace Tianmu { namespace types { static_assert(sizeof(DT) == 8); -RCDateTime::RCDateTime(int64_t v, common::CT at) : at(at) { - null = (v == common::NULL_VALUE_64); - if (!null) { - *(int64_t *)&dt = v; +RCDateTime::RCDateTime(int64_t v, common::CT at) : at_(at) { + null_ = (v == common::NULL_VALUE_64); + if (!null_) { + *(int64_t *)&dt_ = v; if (at == common::CT::DATE) { - dt.second = 0; - dt.minute = 0; - dt.hour = 0; + dt_.second = 0; + dt_.minute = 0; + dt_.hour = 0; } else if (at == common::CT::TIME) { - dt.day = 0; - dt.month = 0; - dt.year = 0; + dt_.day = 0; + dt_.month = 0; + dt_.year = 0; } } } RCDateTime::RCDateTime(short year) { - at = common::CT::YEAR; - null = false; + at_ = common::CT::YEAR; + null_ = false; if (year == common::NULL_VALUE_SH) - null = true; + null_ = true; else { - dt.year = std::abs(year); + dt_.year = std::abs(year); } } -RCDateTime::RCDateTime(short yh, short mm, short ds, common::CT at) : at(at) { - null = false; +RCDateTime::RCDateTime(short yh, short mm, short ds, common::CT at) : at_(at) { + null_ = false; if (at == common::CT::DATE) { - dt.day = std::abs(ds); - dt.month = std::abs(mm); - dt.year = std::abs(yh); + dt_.day = std::abs(ds); + dt_.month = std::abs(mm); + dt_.year = std::abs(yh); } else if (at == common::CT::TIME) { - dt.second = std::abs(ds); - dt.minute = std::abs(mm); - dt.time_hour = std::abs(yh); + dt_.second = std::abs(ds); + dt_.minute = std::abs(mm); + dt_.time_hour = std::abs(yh); if (yh < 0 || mm < 0 || ds < 0) - dt.neg = 1; + dt_.neg = 1; } else TIANMU_ERROR("type not supported"); } RCDateTime::RCDateTime(short year, short month, short day, short hour, short minute, short second, common::CT at) - : at(at) { + : at_(at) { ASSERT(at == common::CT::DATETIME || at == common::CT::TIMESTAMP || at == common::CT::DATE, "should be 'at == common::CT::DATETIME || at == common::CT::TIMESTAMP || at == common::CT::DATE'"); - null = false; - dt.year = std::abs(year); - dt.month = std::abs(month); - dt.day = std::abs(day); - dt.hour = std::abs(hour); - dt.minute = std::abs(minute); - dt.second = std::abs(second); + null_ = false; + dt_.year = std::abs(year); + dt_.month = std::abs(month); + dt_.day = std::abs(day); + dt_.hour = std::abs(hour); + dt_.minute = std::abs(minute); + dt_.second = std::abs(second); } RCDateTime::RCDateTime(const MYSQL_TIME &myt, common::CT at) { ASSERT(at == common::CT::DATETIME || at == common::CT::TIMESTAMP || at == common::CT::DATE, "should be 'at == common::CT::DATETIME || at == common::CT::TIMESTAMP || common::CT::DATE'"); - null = false; + null_ = false; - dt.year = myt.year; - dt.month = myt.month; - dt.day = myt.day; - dt.hour = myt.hour; - dt.minute = myt.minute; - dt.second = myt.second; - dt.microsecond = myt.second_part; - dt.neg = myt.neg; + dt_.year = myt.year; + dt_.month = myt.month; + dt_.day = myt.day; + dt_.hour = myt.hour; + dt_.minute = myt.minute; + dt_.second = myt.second; + dt_.microsecond = myt.second_part; + dt_.neg = myt.neg; } -RCDateTime::RCDateTime(RCNum &rcn, common::CT at) : at(at) { - null = rcn.null; - if (!null) { +RCDateTime::RCDateTime(RCNum &rcn, common::CT at) : at_(at) { + null_ = rcn.null_; + if (!null_) { if (core::ATI::IsRealType(rcn.Type())) throw common::DataTypeConversionException(common::TianmuError(common::ErrorCode::DATACONVERSION)); if (rcn.Type() == common::CT::NUM && rcn.Scale() > 0) @@ -114,9 +114,9 @@ RCDateTime::RCDateTime(const RCDateTime &rcdt) : ValueBasic(rcdt) { RCDateTime::~RCDateTime() {} RCDateTime &RCDateTime::operator=(const RCDateTime &rcv) { - *(int64_t *)&dt = *(int64_t *)&rcv.dt; - this->at = rcv.at; - this->null = rcv.null; + *(int64_t *)&dt_ = *(int64_t *)&rcv.dt_; + this->at_ = rcv.at_; + this->null_ = rcv.null_; return *this; } @@ -125,38 +125,38 @@ RCDateTime &RCDateTime::operator=(const RCDataType &rcv) { *this = (RCDateTime &)rcv; else { TIANMU_ERROR("bad cast"); - null = true; + null_ = true; } return *this; } RCDateTime &RCDateTime::Assign(int64_t v, common::CT at) { - this->at = at; - null = (v == common::NULL_VALUE_64); - if (null) - *(int64_t *)&dt = 0; + this->at_ = at; + null_ = (v == common::NULL_VALUE_64); + if (null_) + *(int64_t *)&dt_ = 0; else - *(int64_t *)&dt = v; + *(int64_t *)&dt_ = v; return *this; } int64_t RCDateTime::GetInt64() const { - if (null) + if (null_) return common::NULL_VALUE_64; - return *(int64_t *)&dt; + return *(int64_t *)&dt_; } bool RCDateTime::ToInt64(int64_t &value) const { if (!IsNull()) { - if (at == common::CT::YEAR) { - value = (int)dt.year; + if (at_ == common::CT::YEAR) { + value = (int)dt_.year; return true; - } else if (at == common::CT::DATE) { + } else if (at_ == common::CT::DATE) { value = Year() * 10000 + Month() * 100 + Day(); return true; - } else if (at == common::CT::TIME) { - value = dt.time_hour * 10000 + Minute() * 100 + Second(); - if (dt.Neg()) + } else if (at_ == common::CT::TIME) { + value = dt_.time_hour * 10000 + Minute() * 100 + Second(); + if (dt_.Neg()) value = -value; return true; } else { @@ -175,21 +175,21 @@ bool RCDateTime::IsZero() const { return *this == GetSpecialValue(Type()); } BString RCDateTime::ToBString() const { if (!IsNull()) { BString rcs(0, 30, true); - char *buf = rcs.val; - if (dt.Neg()) + char *buf = rcs.val_; + if (dt_.Neg()) *buf++ = '-'; - if (at == common::CT::YEAR) { + if (at_ == common::CT::YEAR) { std::sprintf(buf, "%04d", (int)std::abs(Year())); - } else if (at == common::CT::DATE) { + } else if (at_ == common::CT::DATE) { std::sprintf(buf, "%04d-%02d-%02d", (int)std::abs(Year()), (int)std::abs(Month()), (int)std::abs(Day())); - } else if (at == common::CT::TIME) { - std::sprintf(buf, "%02d:%02d:%02d", (int)dt.time_hour, (int)Minute(), (int)Second()); - } else if (at == common::CT::DATETIME || at == common::CT::TIMESTAMP) { + } else if (at_ == common::CT::TIME) { + std::sprintf(buf, "%02d:%02d:%02d", (int)dt_.time_hour, (int)Minute(), (int)Second()); + } else if (at_ == common::CT::DATETIME || at_ == common::CT::TIMESTAMP) { std::sprintf(buf, "%04d-%02d-%02d %02d:%02d:%02d.%06d", (int)std::abs(Year()), (int)std::abs(Month()), (int)std::abs(Day()), (int)Hour(), (int)Minute(), (int)Second(), (int)MicroSecond()); } else TIANMU_ERROR("type not supported"); - rcs.len = (uint)std::strlen(rcs.val); + rcs.len_ = (uint)std::strlen(rcs.val_); return rcs; } return BString(); @@ -205,18 +205,18 @@ common::ErrorCode RCDateTime::Parse(const int64_t &v, RCDateTime &rcv, common::C if (v < 0) sign = -1; - rcv.at = at; + rcv.at_ = at; if (v == common::NULL_VALUE_64) { - rcv.null = true; + rcv.null_ = true; return common::ErrorCode::SUCCESS; } else - rcv.null = false; + rcv.null_ = false; if (at == common::CT::YEAR) { uint vv = (uint)v; vv = ToCorrectYear(vv, at, (precision >= 0 && precision < 4)); if (IsCorrectTIANMUYear((short)vv)) { - rcv.dt.year = (short)vv; + rcv.dt_.year = (short)vv; return common::ErrorCode::SUCCESS; } } else if (at == common::CT::DATE) { @@ -224,13 +224,13 @@ common::ErrorCode RCDateTime::Parse(const int64_t &v, RCDateTime &rcv, common::C rcv = GetSpecialValue(at); return common::ErrorCode::OUT_OF_RANGE; } - rcv.dt.day = tmp_v % 100; + rcv.dt_.day = tmp_v % 100; tmp_v /= 100; if (!CanBeMonth(tmp_v % 100)) { rcv = GetSpecialValue(at); return common::ErrorCode::OUT_OF_RANGE; } - rcv.dt.month = tmp_v % 100; + rcv.dt_.month = tmp_v % 100; tmp_v /= 100; uint vv = uint(tmp_v); vv = ToCorrectYear(vv, at); @@ -238,21 +238,21 @@ common::ErrorCode RCDateTime::Parse(const int64_t &v, RCDateTime &rcv, common::C rcv = GetSpecialValue(at); return common::ErrorCode::OUT_OF_RANGE; } - rcv.dt.year = vv; - if (sign == 1 && IsCorrectTIANMUDate(short(rcv.dt.year), short(rcv.dt.month), short(rcv.dt.day))) + rcv.dt_.year = vv; + if (sign == 1 && IsCorrectTIANMUDate(short(rcv.dt_.year), short(rcv.dt_.month), short(rcv.dt_.day))) return common::ErrorCode::SUCCESS; } else if (at == common::CT::TIME) { if (!CanBeSecond(tmp_v % 100)) { rcv = GetSpecialValue(at); return common::ErrorCode::OUT_OF_RANGE; } - rcv.dt.second = tmp_v % 100; + rcv.dt_.second = tmp_v % 100; tmp_v /= 100; if (!CanBeMinute(tmp_v % 100)) { rcv = GetSpecialValue(at); return common::ErrorCode::OUT_OF_RANGE; } - rcv.dt.minute = tmp_v % 100; + rcv.dt_.minute = tmp_v % 100; tmp_v /= 100; if ((tmp_v * sign) > RC_TIME_MAX.Hour()) { @@ -263,11 +263,11 @@ common::ErrorCode RCDateTime::Parse(const int64_t &v, RCDateTime &rcv, common::C return common::ErrorCode::OUT_OF_RANGE; } - rcv.dt.hour = tmp_v; + rcv.dt_.hour = tmp_v; - if (IsCorrectTIANMUTime(short(rcv.dt.hour * sign), short(rcv.dt.minute * sign), short(rcv.dt.second * sign))) { + if (IsCorrectTIANMUTime(short(rcv.dt_.hour * sign), short(rcv.dt_.minute * sign), short(rcv.dt_.second * sign))) { if (sign == -1) - rcv.dt.neg = 1; + rcv.dt_.neg = 1; return common::ErrorCode::SUCCESS; } else { rcv = RC_TIME_SPEC; @@ -279,44 +279,44 @@ common::ErrorCode RCDateTime::Parse(const int64_t &v, RCDateTime &rcv, common::C rcv = GetSpecialValue(at); return common::ErrorCode::OUT_OF_RANGE; } - rcv.dt.second = tmp_v % 100; + rcv.dt_.second = tmp_v % 100; tmp_v /= 100; if (!CanBeMinute(tmp_v % 100)) { rcv = GetSpecialValue(at); return common::ErrorCode::OUT_OF_RANGE; } - rcv.dt.minute = tmp_v % 100; + rcv.dt_.minute = tmp_v % 100; tmp_v /= 100; if (!CanBeHour(tmp_v % 100)) { rcv = GetSpecialValue(at); return common::ErrorCode::OUT_OF_RANGE; } - rcv.dt.hour = tmp_v % 100; + rcv.dt_.hour = tmp_v % 100; tmp_v /= 100; } if (!CanBeDay(tmp_v % 100)) { rcv = GetSpecialValue(at); return common::ErrorCode::OUT_OF_RANGE; } - rcv.dt.day = tmp_v % 100; + rcv.dt_.day = tmp_v % 100; tmp_v /= 100; if (!CanBeMonth(tmp_v % 100)) { rcv = GetSpecialValue(at); return common::ErrorCode::OUT_OF_RANGE; } - rcv.dt.month = tmp_v % 100; + rcv.dt_.month = tmp_v % 100; tmp_v /= 100; if (!CanBeYear(tmp_v)) { rcv = GetSpecialValue(at); return common::ErrorCode::OUT_OF_RANGE; } - rcv.dt.year = RCDateTime::ToCorrectYear((uint)tmp_v, at); + rcv.dt_.year = RCDateTime::ToCorrectYear((uint)tmp_v, at); if (sign == 1 && at == common::CT::DATETIME && - IsCorrectTIANMUDatetime(rcv.dt.year, rcv.dt.month, rcv.dt.day, rcv.dt.hour, rcv.dt.minute, rcv.dt.second)) + IsCorrectTIANMUDatetime(rcv.dt_.year, rcv.dt_.month, rcv.dt_.day, rcv.dt_.hour, rcv.dt_.minute, rcv.dt_.second)) return common::ErrorCode::SUCCESS; if (sign == 1 && at == common::CT::TIMESTAMP && - IsCorrectTIANMUTimestamp(short(rcv.dt.year), short(rcv.dt.month), short(rcv.dt.day), short(rcv.dt.hour), - short(rcv.dt.minute), short(rcv.dt.second))) + IsCorrectTIANMUTimestamp(short(rcv.dt_.year), short(rcv.dt_.month), short(rcv.dt_.day), short(rcv.dt_.hour), + short(rcv.dt_.minute), short(rcv.dt_.second))) return common::ErrorCode::SUCCESS; } else TIANMU_ERROR("type not supported"); @@ -529,7 +529,7 @@ RCDateTime RCDateTime::GetCurrent() { } bool RCDateTime::operator==(const RCDataType &rcv) const { - if (!AreComparable(at, rcv.Type()) || IsNull() || rcv.IsNull()) + if (!AreComparable(at_, rcv.Type()) || IsNull() || rcv.IsNull()) return false; if (rcv.GetValueType() == ValueTypeEnum::DATE_TIME_TYPE) return compare((RCDateTime &)rcv) == 0; @@ -540,7 +540,7 @@ bool RCDateTime::operator==(const RCDataType &rcv) const { } bool RCDateTime::operator<(const RCDataType &rcv) const { - if (!AreComparable(at, rcv.Type()) || IsNull() || rcv.IsNull()) + if (!AreComparable(at_, rcv.Type()) || IsNull() || rcv.IsNull()) return false; if (rcv.GetValueType() == ValueTypeEnum::DATE_TIME_TYPE) return compare((RCDateTime &)rcv) < 0; @@ -550,7 +550,7 @@ bool RCDateTime::operator<(const RCDataType &rcv) const { } bool RCDateTime::operator>(const RCDataType &rcv) const { - if (!AreComparable(at, rcv.Type()) || IsNull() || rcv.IsNull()) + if (!AreComparable(at_, rcv.Type()) || IsNull() || rcv.IsNull()) return false; if (rcv.GetValueType() == ValueTypeEnum::DATE_TIME_TYPE) return compare((RCDateTime &)rcv) > 0; @@ -560,7 +560,7 @@ bool RCDateTime::operator>(const RCDataType &rcv) const { } bool RCDateTime::operator>=(const RCDataType &rcv) const { - if (!AreComparable(at, rcv.Type()) || IsNull() || rcv.IsNull()) + if (!AreComparable(at_, rcv.Type()) || IsNull() || rcv.IsNull()) return false; if (rcv.GetValueType() == ValueTypeEnum::DATE_TIME_TYPE) return compare((RCDateTime &)rcv) >= 0; @@ -570,7 +570,7 @@ bool RCDateTime::operator>=(const RCDataType &rcv) const { } bool RCDateTime::operator<=(const RCDataType &rcv) const { - if (!AreComparable(at, rcv.Type()) || IsNull() || rcv.IsNull()) + if (!AreComparable(at_, rcv.Type()) || IsNull() || rcv.IsNull()) return false; if (rcv.GetValueType() == ValueTypeEnum::DATE_TIME_TYPE) return compare((RCDateTime &)rcv) <= 0; @@ -580,7 +580,7 @@ bool RCDateTime::operator<=(const RCDataType &rcv) const { } bool RCDateTime::operator!=(const RCDataType &rcv) const { - if (!AreComparable(at, rcv.Type()) || IsNull() || rcv.IsNull()) + if (!AreComparable(at_, rcv.Type()) || IsNull() || rcv.IsNull()) return false; if (rcv.GetValueType() == ValueTypeEnum::DATE_TIME_TYPE) return compare((RCDateTime &)rcv) != 0; @@ -590,57 +590,57 @@ bool RCDateTime::operator!=(const RCDataType &rcv) const { } int64_t RCDateTime::operator-(const RCDateTime &sec) const { - if (at != common::CT::DATE || sec.at != common::CT::DATE || IsNull() || sec.IsNull()) + if (at_ != common::CT::DATE || sec.at_ != common::CT::DATE || IsNull() || sec.IsNull()) return common::NULL_VALUE_64; int64_t result = 0; // span in days for [sec., ..., this] bool notless_than_sec = (this->operator>(sec)); if (notless_than_sec) { - if (dt.year == sec.dt.year) { - if (dt.month == sec.dt.month) { - result = dt.day - sec.dt.day; + if (dt_.year == sec.dt_.year) { + if (dt_.month == sec.dt_.month) { + result = dt_.day - sec.dt_.day; } else { - for (unsigned int i = sec.dt.month + 1; i < dt.month; i++) result += NoDaysInMonth(dt.year, i); - result += NoDaysInMonth(sec.dt.year, sec.dt.month) - sec.dt.day + 1; - result += dt.day - 1; + for (unsigned int i = sec.dt_.month + 1; i < dt_.month; i++) result += NoDaysInMonth(dt_.year, i); + result += NoDaysInMonth(sec.dt_.year, sec.dt_.month) - sec.dt_.day + 1; + result += dt_.day - 1; } } else { - for (int i = int(sec.dt.year) + 1; i < dt.year; i++) result += (IsLeapYear(i) ? 366 : 365); - for (int i = int(sec.dt.month) + 1; i <= 12; i++) result += NoDaysInMonth(sec.dt.year, i); - for (unsigned int i = 1; i < dt.month; i++) result += NoDaysInMonth(dt.year, i); - result += NoDaysInMonth(sec.dt.year, sec.dt.month) - sec.dt.day + 1; - result += dt.day - 1; + for (int i = int(sec.dt_.year) + 1; i < dt_.year; i++) result += (IsLeapYear(i) ? 366 : 365); + for (int i = int(sec.dt_.month) + 1; i <= 12; i++) result += NoDaysInMonth(sec.dt_.year, i); + for (unsigned int i = 1; i < dt_.month; i++) result += NoDaysInMonth(dt_.year, i); + result += NoDaysInMonth(sec.dt_.year, sec.dt_.month) - sec.dt_.day + 1; + result += dt_.day - 1; } } else { - if (dt.year == sec.dt.year) { - if (dt.month == sec.dt.month) { - result = sec.dt.day - dt.day; + if (dt_.year == sec.dt_.year) { + if (dt_.month == sec.dt_.month) { + result = sec.dt_.day - dt_.day; } else { - for (int i = int(dt.month) + 1; i < sec.dt.month; i++) result += NoDaysInMonth(sec.dt.year, i); - result += NoDaysInMonth(dt.year, dt.month) - dt.day + 1; - result += sec.dt.day - 1; + for (int i = int(dt_.month) + 1; i < sec.dt_.month; i++) result += NoDaysInMonth(sec.dt_.year, i); + result += NoDaysInMonth(dt_.year, dt_.month) - dt_.day + 1; + result += sec.dt_.day - 1; } } else { - for (unsigned int i = (dt.year) + 1; i < sec.dt.year; i++) result += (IsLeapYear(i) ? 366 : 365); - for (int i = int(dt.month) + 1; i <= 12; i++) result += NoDaysInMonth(dt.year, i); - for (unsigned int i = 1; i < sec.dt.month; i++) result += NoDaysInMonth(sec.dt.year, i); - result += NoDaysInMonth(dt.year, dt.month) - dt.day + 1; - result += sec.dt.day - 1; + for (unsigned int i = (dt_.year) + 1; i < sec.dt_.year; i++) result += (IsLeapYear(i) ? 366 : 365); + for (int i = int(dt_.month) + 1; i <= 12; i++) result += NoDaysInMonth(dt_.year, i); + for (unsigned int i = 1; i < sec.dt_.month; i++) result += NoDaysInMonth(sec.dt_.year, i); + result += NoDaysInMonth(dt_.year, dt_.month) - dt_.day + 1; + result += sec.dt_.day - 1; } } return notless_than_sec ? result : -result; } -common::CT RCDateTime::Type() const { return at; } +common::CT RCDateTime::Type() const { return at_; } uint RCDateTime::GetHashCode() const { - uint64_t v = *(uint64_t *)&dt; + uint64_t v = *(uint64_t *)&dt_; return (uint)(v >> 32) + (uint)(v) /*+ *(short*)&tz*/; } int RCDateTime::compare(const RCDateTime &rcv) const { - int64_t v1 = *(int64_t *)&dt; - int64_t v2 = *(int64_t *)&rcv.dt; + int64_t v1 = *(int64_t *)&dt_; + int64_t v2 = *(int64_t *)&rcv.dt_; return (v1 < v2 ? -1 : (v1 > v2 ? 1 : 0)); } @@ -689,7 +689,7 @@ void RCDateTime::AdjustTimezone(RCDateTime &dt) { // UTC time stored on server dt = RCDateTime((utc_t.tm_year + 1900) % 10000, utc_t.tm_mon + 1, utc_t.tm_mday, utc_t.tm_hour, utc_t.tm_min, utc_t.tm_sec, common::CT::TIMESTAMP); - dt.dt.microsecond = t.second_part; + dt.dt_.microsecond = t.second_part; } } } // namespace types diff --git a/storage/tianmu/types/rc_num.cpp b/storage/tianmu/types/rc_num.cpp index 9de7f0e76..4425baedc 100644 --- a/storage/tianmu/types/rc_num.cpp +++ b/storage/tianmu/types/rc_num.cpp @@ -33,12 +33,12 @@ RCNum::RCNum(common::CT attrt) : value_(0), scale_(0), is_double_(false), is_dot RCNum::RCNum(int64_t value_, short scale, bool is_double_, common::CT attrt) { Assign(value_, scale, is_double_, attrt); } RCNum::RCNum(double value_) : value_(*(int64_t *)&value_), scale_(0), is_double_(true), is_dot_(false), attr_type_(common::CT::REAL) { - null = (value_ == NULL_VALUE_D ? true : false); + null_ = (value_ == NULL_VALUE_D ? true : false); } RCNum::RCNum(const RCNum &rcn) : ValueBasic(rcn), value_(rcn.value_), scale_(rcn.scale_), is_double_(rcn.is_double_), is_dot_(rcn.is_dot_), attr_type_(rcn.attr_type_) { - null = rcn.null; + null_ = rcn.null_; } RCNum::~RCNum() {} @@ -60,9 +60,9 @@ RCNum &RCNum::Assign(int64_t value_, short scale, bool is_double_, common::CT at if (!(this->attr_type_ == common::CT::REAL || this->attr_type_ == common::CT::FLOAT)) this->attr_type_ = common::CT::REAL; this->is_dot_ = false; scale_ = 0; - null = (value_ == *(int64_t *)&NULL_VALUE_D ? true : false); + null_ = (value_ == *(int64_t *)&NULL_VALUE_D ? true : false); } else - null = (value_ == common::NULL_VALUE_64 ? true : false); + null_ = (value_ == common::NULL_VALUE_64 ? true : false); return *this; } @@ -73,7 +73,7 @@ RCNum &RCNum::Assign(double value_) { this->is_dot_ = false; this->attr_type_ = common::CT::REAL; common::double_int_t v(value_); - null = (v.i == common::NULL_VALUE_64 ? true : false); + null_ = (v.i == common::NULL_VALUE_64 ? true : false); return *this; } @@ -93,7 +93,7 @@ RCNum &RCNum::operator=(const RCNum &rcn) { value_ = rcn.value_; is_double_ = rcn.is_double_; scale_ = rcn.scale_; - null = rcn.null; + null_ = rcn.null_; attr_type_ = rcn.attr_type_; return *this; } @@ -107,7 +107,7 @@ RCNum &RCNum::operator=(const RCDataType &rcdt) { *this = rcn1; } else { TIANMU_ERROR("Unsupported assign operation!"); - null = true; + null_ = true; } } return *this; @@ -250,7 +250,8 @@ RCNum::operator double() const { } bool RCNum::operator==(const RCDataType &rcdt) const { - if (null || rcdt.IsNull()) return false; + if (null_ || rcdt.IsNull()) + return false; if (rcdt.GetValueType() == ValueTypeEnum::NUMERIC_TYPE) return (compare((RCNum &)rcdt) == 0); if (rcdt.GetValueType() == ValueTypeEnum::DATE_TIME_TYPE) return (compare((RCDateTime &)rcdt) == 0); if (rcdt.GetValueType() == ValueTypeEnum::STRING_TYPE) return (rcdt == this->ToBString()); @@ -259,7 +260,8 @@ bool RCNum::operator==(const RCDataType &rcdt) const { } bool RCNum::operator!=(const RCDataType &rcdt) const { - if (null || rcdt.IsNull()) return false; + if (null_ || rcdt.IsNull()) + return false; if (rcdt.GetValueType() == ValueTypeEnum::NUMERIC_TYPE) return (compare((RCNum &)rcdt) != 0); if (rcdt.GetValueType() == ValueTypeEnum::DATE_TIME_TYPE) return (compare((RCDateTime &)rcdt) != 0); if (rcdt.GetValueType() == ValueTypeEnum::STRING_TYPE) return (rcdt != this->ToBString()); @@ -295,7 +297,8 @@ bool RCNum::operator<=(const RCDataType &rcdt) const { } bool RCNum::operator>=(const RCDataType &rcdt) const { - if (null || rcdt.IsNull()) return false; + if (null_ || rcdt.IsNull()) + return false; if (rcdt.GetValueType() == ValueTypeEnum::NUMERIC_TYPE) return (compare((RCNum &)rcdt) >= 0); if (rcdt.GetValueType() == ValueTypeEnum::DATE_TIME_TYPE) return (compare((RCDateTime &)rcdt) >= 0); if (rcdt.GetValueType() == ValueTypeEnum::STRING_TYPE) return (this->ToBString() >= rcdt); @@ -304,7 +307,7 @@ bool RCNum::operator>=(const RCDataType &rcdt) const { } RCNum &RCNum::operator-=(const RCNum &rcn) { - DEBUG_ASSERT(!null); + DEBUG_ASSERT(!null_); if (rcn.IsNull() || rcn.IsNull()) return *this; if (IsReal() || rcn.IsReal()) { if (IsReal() && rcn.IsReal()) @@ -327,7 +330,7 @@ RCNum &RCNum::operator-=(const RCNum &rcn) { } RCNum &RCNum::operator+=(const RCNum &rcn) { - DEBUG_ASSERT(!null); + DEBUG_ASSERT(!null_); if (rcn.IsNull() || rcn.IsNull()) return *this; if (IsReal() || rcn.IsReal()) { if (IsReal() && rcn.IsReal()) @@ -350,7 +353,7 @@ RCNum &RCNum::operator+=(const RCNum &rcn) { } RCNum &RCNum::operator*=(const RCNum &rcn) { - DEBUG_ASSERT(!null); + DEBUG_ASSERT(!null_); if (rcn.IsNull() || rcn.IsNull()) return *this; if (IsReal() || rcn.IsReal()) { if (IsReal() && rcn.IsReal()) @@ -386,7 +389,7 @@ void fcvt(char *buf_, double val_, int digits_, int *dec_, int *sign_) { } RCNum &RCNum::operator/=(const RCNum &rcn) { - DEBUG_ASSERT(!null); + DEBUG_ASSERT(!null_); if (rcn.IsNull() || rcn.IsNull()) return *this; if (IsReal() || rcn.IsReal()) { if (IsReal() && rcn.IsReal()) diff --git a/storage/tianmu/types/rc_num.h b/storage/tianmu/types/rc_num.h index ec3f293eb..7bbfb73cb 100644 --- a/storage/tianmu/types/rc_num.h +++ b/storage/tianmu/types/rc_num.h @@ -107,7 +107,7 @@ class RCNum : public ValueBasic { common::CT attr_type_; public: - const static ValueTypeEnum value_type = ValueTypeEnum::NUMERIC_TYPE; + const static ValueTypeEnum value_type_ = ValueTypeEnum::NUMERIC_TYPE; }; } // namespace types diff --git a/storage/tianmu/types/rc_value_object.cpp b/storage/tianmu/types/rc_value_object.cpp index 3374dc849..a949b0a80 100644 --- a/storage/tianmu/types/rc_value_object.cpp +++ b/storage/tianmu/types/rc_value_object.cpp @@ -23,7 +23,8 @@ namespace types { RCValueObject::RCValueObject() {} RCValueObject::RCValueObject(const RCValueObject &rcvo) { - if (rcvo.value.get()) construct(*rcvo.value); + if (rcvo.value_.get()) + construct(*rcvo.value_); } RCValueObject::RCValueObject(const RCDataType &rcdt) { construct(rcdt); } @@ -31,21 +32,21 @@ RCValueObject::RCValueObject(const RCDataType &rcdt) { construct(rcdt); } RCValueObject::~RCValueObject() {} RCValueObject &RCValueObject::operator=(const RCValueObject &rcvo) { - if (rcvo.value.get()) - construct(*rcvo.value); + if (rcvo.value_.get()) + construct(*rcvo.value_); else - value.reset(); + value_.reset(); return *this; } -inline void RCValueObject::construct(const RCDataType &rcdt) { value = rcdt.Clone(); } +inline void RCValueObject::construct(const RCDataType &rcdt) { value_ = rcdt.Clone(); } bool RCValueObject::compare(const RCValueObject &rcvo1, const RCValueObject &rcvo2, common::Operator op, char like_esc) { if (rcvo1.IsNull() || rcvo2.IsNull()) return false; else - return RCDataType::compare(*rcvo1.value, *rcvo2.value, op, like_esc); + return RCDataType::compare(*rcvo1.value_, *rcvo2.value_, op, like_esc); } bool RCValueObject::compare(const RCValueObject &rcvo, common::Operator op, char like_esc) const { @@ -54,93 +55,94 @@ bool RCValueObject::compare(const RCValueObject &rcvo, common::Operator op, char bool RCValueObject::operator==(const RCValueObject &rcvo) const { if (IsNull() || rcvo.IsNull()) return false; - return *value == *rcvo.value; + return *value_ == *rcvo.value_; } bool RCValueObject::operator<(const RCValueObject &rcvo) const { if (IsNull() || rcvo.IsNull()) return false; - return *value < *rcvo.value; + return *value_ < *rcvo.value_; } bool RCValueObject::operator>(const RCValueObject &rcvo) const { if (IsNull() || rcvo.IsNull()) return false; - return *value > *rcvo.value; + return *value_ > *rcvo.value_; } bool RCValueObject::operator>=(const RCValueObject &rcvo) const { if (IsNull() || rcvo.IsNull()) return false; - return *value >= *rcvo.value; + return *value_ >= *rcvo.value_; } bool RCValueObject::operator<=(const RCValueObject &rcvo) const { if (IsNull() || rcvo.IsNull()) return false; - return *value <= *rcvo.value; + return *value_ <= *rcvo.value_; } bool RCValueObject::operator!=(const RCValueObject &rcvo) const { if (IsNull() || rcvo.IsNull()) return false; - return *value != *rcvo.value; + return *value_ != *rcvo.value_; } bool RCValueObject::operator==(const RCDataType &rcn) const { if (IsNull() || rcn.IsNull()) return false; - return *value == rcn; + return *value_ == rcn; } bool RCValueObject::operator<(const RCDataType &rcn) const { if (IsNull() || rcn.IsNull()) return false; - return *value < rcn; + return *value_ < rcn; } bool RCValueObject::operator>(const RCDataType &rcn) const { if (IsNull() || rcn.IsNull()) return false; - return *value > rcn; + return *value_ > rcn; } bool RCValueObject::operator>=(const RCDataType &rcn) const { if (IsNull() || rcn.IsNull()) return false; - return *value >= rcn; + return *value_ >= rcn; } bool RCValueObject::operator<=(const RCDataType &rcdt) const { if (IsNull() || rcdt.IsNull()) return false; - return *value <= rcdt; + return *value_ <= rcdt; } bool RCValueObject::operator!=(const RCDataType &rcn) const { if (IsNull() || rcn.IsNull()) return false; - return *value != rcn; + return *value_ != rcn; } -bool RCValueObject::IsNull() const { return value.get() ? value->IsNull() : true; } +bool RCValueObject::IsNull() const { return value_.get() ? value_->IsNull() : true; } -RCDataType &RCValueObject::operator*() const { return value.get() ? *value.get() : RCNum::NullValue(); } +RCDataType &RCValueObject::operator*() const { return value_.get() ? *value_.get() : RCNum::NullValue(); } RCValueObject::operator RCNum &() const { if (IsNull()) return RCNum::NullValue(); if (GetValueType() == ValueTypeEnum::NUMERIC_TYPE || GetValueType() == ValueTypeEnum::DATE_TIME_TYPE) - return static_cast(*value); + return static_cast(*value_); TIANMU_ERROR("Bad cast in RCValueObject::RCNum&()"); - return static_cast(*value); + return static_cast(*value_); } RCValueObject::operator RCDateTime &() const { if (IsNull()) return RCDateTime::NullValue(); - if (GetValueType() == ValueTypeEnum::DATE_TIME_TYPE) return static_cast(*value); + if (GetValueType() == ValueTypeEnum::DATE_TIME_TYPE) + return static_cast(*value_); TIANMU_ERROR("Bad cast in RCValueObject::RCDateTime&()"); - return static_cast(*value); + return static_cast(*value_); } BString RCValueObject::ToBString() const { if (IsNull()) return BString(); - return value->ToBString(); + return value_->ToBString(); } uint RCValueObject::GetHashCode() const { if (IsNull()) return 0; - return value->GetHashCode(); + return value_->GetHashCode(); } } // namespace types diff --git a/storage/tianmu/types/value_parser4txt.cpp b/storage/tianmu/types/value_parser4txt.cpp index 37461482d..cb02b16e5 100644 --- a/storage/tianmu/types/value_parser4txt.cpp +++ b/storage/tianmu/types/value_parser4txt.cpp @@ -29,7 +29,7 @@ const uint PARS_BUF_SIZE = 128; static int String2DateTime(const BString &s, RCDateTime &rcdt, common::CT at) { MYSQL_TIME myt; MYSQL_TIME_STATUS not_used; - if (str_to_datetime(s.GetDataBytesPointer(), s.len, &myt, TIME_DATETIME_ONLY, ¬_used)) { + if (str_to_datetime(s.GetDataBytesPointer(), s.len_, &myt, TIME_DATETIME_ONLY, ¬_used)) { return 1; } @@ -88,14 +88,14 @@ static inline common::ErrorCode EatInt64(char *&ptr, int &len, int64_t &out_valu common::ErrorCode ValueParserForText::ParseNum(const BString &rcs, RCNum &rcn, short scale) { // TODO: refactor char *val, *val_ptr; - val = val_ptr = rcs.val; - int len = rcs.len; + val = val_ptr = rcs.val_; + int len = rcs.len_; EatWhiteSigns(val, len); if (rcs.Equals("NULL", 4)) { - rcn.null = true; + rcn.null_ = true; return common::ErrorCode::SUCCESS; } - rcn.null = false; + rcn.null_ = false; rcn.is_double_ = false; rcn.scale_ = 0; @@ -191,8 +191,8 @@ common::ErrorCode ValueParserForText::Parse(const BString &rcs, RCNum &rcn, comm // TODO: refactor char *val, *val_ptr; - val = val_ptr = rcs.val; - int len = rcs.len; + val = val_ptr = rcs.val_; + int len = rcs.len_; EatWhiteSigns(val, len); int ptr_len = len; val_ptr = val; @@ -249,7 +249,7 @@ common::ErrorCode ValueParserForText::Parse(const BString &rcs, RCNum &rcn, comm if (has_unexpected_sign) { // same as innodb , string convert to 0 - rcn.null = false; + rcn.null_ = false; rcn.attr_type_ = at; rcn.value_ = 0; return common::ErrorCode::VALUE_TRUNCATED; @@ -278,7 +278,7 @@ common::ErrorCode ValueParserForText::Parse(const BString &rcs, RCNum &rcn, comm if (!core::ATI::IsNumericType(at)) return common::ErrorCode::FAILED; } - rcn.null = false; + rcn.null_ = false; rcn.attr_type_ = at; if (core::ATI::IsRealType(at)) { rcn.is_double_ = true; @@ -295,7 +295,7 @@ common::ErrorCode ValueParserForText::Parse(const BString &rcs, RCNum &rcn, comm rcn.is_double_ = false; if (rcs.Equals("NULL", 4)) { - rcn.null = true; + rcn.null_ = true; return common::ErrorCode::SUCCESS; } @@ -354,14 +354,14 @@ common::ErrorCode ValueParserForText::ParseReal(const BString &rcbs, RCNum &rcn, if (!core::ATI::IsRealType(at)) return common::ErrorCode::FAILED; if (rcbs.Equals("NULL", 4) || rcbs.IsNull()) { - rcn.null = true; + rcn.null_ = true; return common::ErrorCode::SUCCESS; } rcn.is_double_ = true; rcn.scale_ = 0; - char *val = rcbs.val; - int len = rcbs.len; + char *val = rcbs.val_; + int len = rcbs.len_; EatWhiteSigns(val, len); char *val_ptr = val; @@ -401,14 +401,15 @@ common::ErrorCode ValueParserForText::ParseReal(const BString &rcbs, RCNum &rcn, ptr_len--; } char stempval[PARS_BUF_SIZE]; - if (rcbs.len >= PARS_BUF_SIZE) return common::ErrorCode::VALUE_TRUNCATED; + if (rcbs.len_ >= PARS_BUF_SIZE) + return common::ErrorCode::VALUE_TRUNCATED; #ifndef NDEBUG // resetting stempval to avoid valgrind // false warnings std::memset(stempval, 0, PARS_BUF_SIZE); #endif - std::memcpy(stempval, rcbs.val, rcbs.len); - stempval[rcbs.len] = 0; + std::memcpy(stempval, rcbs.val_, rcbs.len_); + stempval[rcbs.len_] = 0; double d = 0.0; try { d = std::stod(std::string(stempval)); @@ -424,7 +425,7 @@ common::ErrorCode ValueParserForText::ParseReal(const BString &rcbs, RCNum &rcn, } rcn.attr_type_ = at; - rcn.null = false; + rcn.null_ = false; if (at == common::CT::REAL) { if (d > DBL_MAX) { d = DBL_MAX; @@ -460,18 +461,18 @@ common::ErrorCode ValueParserForText::ParseReal(const BString &rcbs, RCNum &rcn, } common::ErrorCode ValueParserForText::ParseBigInt(const BString &rcs, RCNum &rcn) { - char *val_ptr = rcs.val; - int len = rcs.len; + char *val_ptr = rcs.val_; + int len = rcs.len_; int ptr_len = len; common::ErrorCode ret = common::ErrorCode::SUCCESS; - rcn.null = false; + rcn.null_ = false; rcn.attr_type_ = common::CT::BIGINT; rcn.scale_ = 0; rcn.is_double_ = false; if (rcs.Equals("NULL", 4)) { - rcn.null = true; + rcn.null_ = true; return common::ErrorCode::SUCCESS; } int64_t v = 0; @@ -595,12 +596,12 @@ common::ErrorCode ValueParserForText::ParseDecimal(BString const &rcs, int64_t & common::ErrorCode ValueParserForText::ParseDateTimeOrTimestamp(const BString &rcs, RCDateTime &rcv, common::CT at) { if (rcs.IsNull() || rcs.Equals("NULL", 4)) { - rcv.at = at; - rcv.null = true; + rcv.at_ = at; + rcv.null_ = true; return common::ErrorCode::SUCCESS; } - char *buf = rcs.val; - int buflen = rcs.len; + char *buf = rcs.val_; + int buflen = rcs.len_; EatWhiteSigns(buf, buflen); if (buflen == 0) { @@ -787,12 +788,12 @@ common::ErrorCode ValueParserForText::ParseDateTimeOrTimestamp(const BString &rc common::ErrorCode ValueParserForText::ParseTime(const BString &rcs, RCDateTime &rcv) { if (rcs.IsNull() || rcs.Equals("NULL", 4)) { - rcv.at = common::CT::TIME; - rcv.null = true; + rcv.at_ = common::CT::TIME; + rcv.null_ = true; return common::ErrorCode::SUCCESS; } - char *buf = rcs.val; - int buflen = rcs.len; + char *buf = rcs.val_; + int buflen = rcs.len_; EatWhiteSigns(buf, buflen); if (buflen == 0) { rcv = RCDateTime(RCDateTime::GetSpecialValue(common::CT::TIME)); @@ -947,12 +948,12 @@ common::ErrorCode ValueParserForText::ParseTime(const BString &rcs, RCDateTime & common::ErrorCode ValueParserForText::ParseDate(const BString &rcs, RCDateTime &rcv) { if (rcs.IsNull() || rcs.Equals("NULL", 4)) { - rcv.at = common::CT::DATE; - rcv.null = true; + rcv.at_ = common::CT::DATE; + rcv.null_ = true; return common::ErrorCode::SUCCESS; } - char *buf = rcs.val; - int buflen = rcs.len; + char *buf = rcs.val_; + int buflen = rcs.len_; EatWhiteSigns(buf, buflen); if (buflen == 0) { rcv = RCDateTime(RCDateTime::GetSpecialValue(common::CT::DATE)); @@ -1007,12 +1008,12 @@ common::ErrorCode ValueParserForText::ParseDate(const BString &rcs, RCDateTime & common::ErrorCode ValueParserForText::ParseYear(const BString &rcs, RCDateTime &rcv) { if (rcs.IsNull() || rcs.Equals("NULL", 4)) { - rcv.at = common::CT::YEAR; - rcv.null = true; + rcv.at_ = common::CT::YEAR; + rcv.null_ = true; return common::ErrorCode::SUCCESS; } - char *buf = rcs.val; - int buflen = rcs.len; + char *buf = rcs.val_; + int buflen = rcs.len_; EatWhiteSigns(buf, buflen); if (buflen == 0) { rcv = RCDateTime(RCDateTime::GetSpecialValue(common::CT::YEAR)); diff --git a/storage/tianmu/vc/const_column.cpp b/storage/tianmu/vc/const_column.cpp index ecd1c7edd..9d5713dbb 100644 --- a/storage/tianmu/vc/const_column.cpp +++ b/storage/tianmu/vc/const_column.cpp @@ -193,7 +193,7 @@ char *ConstColumn::ToString(char p_buf[], size_t buf_ct) const { else if (ct.IsString()) { types::BString val; value.GetBString(val); - std::snprintf(p_buf, buf_ct - 2, "\"%.*s", (int)(val.len < buf_ct - 4 ? val.len : buf_ct - 4), + std::snprintf(p_buf, buf_ct - 2, "\"%.*s", (int)(val.len_ < buf_ct - 4 ? val.len_ : buf_ct - 4), val.GetDataBytesPointer()); std::strcat(p_buf, "\""); } diff --git a/storage/tianmu/vc/single_column.cpp b/storage/tianmu/vc/single_column.cpp index e9a5daa71..3d5400118 100644 --- a/storage/tianmu/vc/single_column.cpp +++ b/storage/tianmu/vc/single_column.cpp @@ -67,9 +67,9 @@ double SingleColumn::GetValueDoubleImpl(const core::MIIterator &mit) { } else if (core::ATI::IsStringType(TypeName())) { types::BString vrcbs; col_->GetValueString(mit[dim], vrcbs); - auto vs = std::make_unique(vrcbs.len + 1); - std::memcpy(vs.get(), vrcbs.GetDataBytesPointer(), vrcbs.len); - vs[vrcbs.len] = '\0'; + auto vs = std::make_unique(vrcbs.len_ + 1); + std::memcpy(vs.get(), vrcbs.GetDataBytesPointer(), vrcbs.len_); + vs[vrcbs.len_] = '\0'; val = std::atof(vs.get()); } else DEBUG_ASSERT(0 && "conversion to double not implemented");