apache · imay · Aug 6, 2019 · Aug 1, 2019 · Aug 2, 2019 · Aug 3, 2019
diff --git a/be/src/gutil/endian.h b/be/src/gutil/endian.h
@@ -32,6 +32,18 @@ inline uint64 gbswap_64(uint64 host_int) {
 #endif  // bswap_64
 }
 
+inline unsigned __int128 gbswap_128(unsigned __int128 host_int) {
+    return static_cast<unsigned __int128>(bswap_64(static_cast<uint64>(host_int >> 64))) |
+        (static_cast<unsigned __int128>(bswap_64(static_cast<uint64>(host_int))) << 64);
+}
+
+// Swap bytes of a 24-bit value.
+inline uint32_t bswap_24(uint32_t x) {
+    return  ((x & 0x0000ffULL) << 16) |
+        ((x & 0x00ff00ULL)) |
+        ((x & 0xff0000ULL) >> 16);
+}
+
 #ifdef IS_LITTLE_ENDIAN
 
 // Definitions for ntohl etc. that don't require us to include
@@ -188,4 +200,145 @@ class LittleEndian {
 #define gntohll(x) ghtonll(x)
 #define ntohll(x) htonll(x)
 
+// Utilities to convert numbers between the current hosts's native byte
+// order and big-endian byte order (same as network byte order)
+//
+// Load/Store methods are alignment safe
+class BigEndian {
+public:
+#ifdef IS_LITTLE_ENDIAN
+
+    static uint16 FromHost16(uint16 x) { return bswap_16(x); }
+    static uint16 ToHost16(uint16 x) { return bswap_16(x); }
+
+    static uint32 FromHost24(uint32 x) { return bswap_24(x); }
+    static uint32 ToHost24(uint32 x) { return bswap_24(x); }
+
+    static uint32 FromHost32(uint32 x) { return bswap_32(x); }
+    static uint32 ToHost32(uint32 x) { return bswap_32(x); }
+
+    static uint64 FromHost64(uint64 x) { return gbswap_64(x); }
+    static uint64 ToHost64(uint64 x) { return gbswap_64(x); }
+
+    static unsigned __int128 FromHost128(unsigned __int128 x) { return gbswap_128(x); }
+    static unsigned __int128 ToHost128(unsigned __int128 x) { return gbswap_128(x); }
+
+    static bool IsLittleEndian() { return true; }
+
+#elif defined IS_BIG_ENDIAN
+
+    static uint16 FromHost16(uint16 x) { return x; }
+    static uint16 ToHost16(uint16 x) { return x; }
+
+    static uint32 FromHost24(uint32 x) { return x; }
+    static uint32 ToHost24(uint32 x) { return x; }
+
+    static uint32 FromHost32(uint32 x) { return x; }
+    static uint32 ToHost32(uint32 x) { return x; }
+
+    static uint64 FromHost64(uint64 x) { return x; }
+    static uint64 ToHost64(uint64 x) { return x; }
+
+    static uint128 FromHost128(uint128 x) { return x; }
+    static uint128 ToHost128(uint128 x) { return x; }
+
+    static bool IsLittleEndian() { return false; }
+
+#endif /* ENDIAN */
+    // Functions to do unaligned loads and stores in little-endian order.
+    static uint16 Load16(const void *p) {
+        return ToHost16(UNALIGNED_LOAD16(p));
+    }
+
+    static void Store16(void *p, uint16 v) {
+        UNALIGNED_STORE16(p, FromHost16(v));
+    }
+
+    static uint32 Load32(const void *p) {
+        return ToHost32(UNALIGNED_LOAD32(p));
+    }
+
+    static void Store32(void *p, uint32 v) {
+        UNALIGNED_STORE32(p, FromHost32(v));
+    }
+
+    static uint64 Load64(const void *p) {
+        return ToHost64(UNALIGNED_LOAD64(p));
+    }
+
+    // Build a uint64 from 1-8 bytes.
+    // 8 * len least significant bits are loaded from the memory with
+    // BigEndian order. The 64 - 8 * len most significant bits are
+    // set all to 0.
+    // In latex-friendly words, this function returns:
+    //     $\sum_{i=0}^{len-1} p[i] 256^{i}$, where p[i] is unsigned.
+    //
+    // This function is equivalent with:
+    // uint64 val = 0;
+    // memcpy(&val, p, len);
+    // return ToHost64(val);
+    // TODO(user): write a small benchmark and benchmark the speed
+    // of a memcpy based approach.
+    //
+    // For speed reasons this function does not work for len == 0.
+    // The caller needs to guarantee that 1 <= len <= 8.
+    static uint64 Load64VariableLength(const void * const p, int len) {
+        assert(len >= 1 && len <= 8);
+        uint64 val = Load64(p);
+        uint64 mask = 0;
+        --len;
+        do {
+            mask = (mask << 8) | 0xff;
+            // (--len >= 0) is about 10 % faster than (len--) in some benchmarks.
+        } while (--len >= 0);
+        return val & mask;
+    }
+
+    static void Store64(void *p, uint64 v) {
+        UNALIGNED_STORE64(p, FromHost64(v));
+    }
+
+    static uint128 Load128(const void *p) {
+        return uint128(
+            ToHost64(UNALIGNED_LOAD64(p)),
+            ToHost64(UNALIGNED_LOAD64(reinterpret_cast<const uint64 *>(p) + 1)));
+    }
+
+    static void Store128(void *p, const uint128 v) {
+        UNALIGNED_STORE64(p, FromHost64(Uint128High64(v)));
+        UNALIGNED_STORE64(reinterpret_cast<uint64 *>(p) + 1,
+                          FromHost64(Uint128Low64(v)));
+    }
+
+    // Build a uint128 from 1-16 bytes.
+    // 8 * len least significant bits are loaded from the memory with
+    // BigEndian order. The 128 - 8 * len most significant bits are
+    // set all to 0.
+    static uint128 Load128VariableLength(const void *p, int len) {
+        if (len <= 8) {
+            return uint128(Load64VariableLength(static_cast<const char *>(p)+8,
+                                                len));
+        } else {
+            return uint128(
+                Load64VariableLength(p, len-8),
+                Load64(static_cast<const char *>(p)+8));
+        }
+    }
+
+    // Load & Store in machine's word size.
+    static uword_t LoadUnsignedWord(const void *p) {
+        if (sizeof(uword_t) == 8)
+            return Load64(p);
+        else
+            return Load32(p);
+    }
+
+    static void StoreUnsignedWord(void *p, uword_t v) {
+        if (sizeof(uword_t) == 8)
+            Store64(p, v);
+        else
+            Store32(p, v);
+    }
+};  // BigEndian
+
 #endif  // UTIL_ENDIAN_ENDIAN_H_
diff --git a/be/src/olap/CMakeLists.txt b/be/src/olap/CMakeLists.txt
@@ -41,6 +41,7 @@ add_library(Olap STATIC
     hll.cpp
     in_list_predicate.cpp
     in_stream.cpp
+    key_coder.cpp
     lru_cache.cpp
     memtable.cpp
     merger.cpp
@@ -63,6 +64,7 @@ add_library(Olap STATIC
     serialize.cpp
     storage_engine.cpp
     data_dir.cpp
+    short_key_index.cpp
     snapshot_manager.cpp
     stream_index_common.cpp
     stream_index_reader.cpp
@@ -84,6 +86,9 @@ add_library(Olap STATIC
     rowset/segment_v2/encoding_info.cpp
     rowset/segment_v2/ordinal_page_index.cpp
     rowset/segment_v2/binary_dict_page.cpp
+    rowset/segment_v2/segment.cpp
+    rowset/segment_v2/segment_iterator.cpp
+    rowset/segment_v2/segment_writer.cpp
     rowset_factory.cpp
     task/engine_batch_load_task.cpp
     task/engine_checksum_task.cpp

diff --git a/be/src/olap/column_mapping.h b/be/src/olap/column_mapping.h
@@ -18,10 +18,10 @@
 #ifndef DORIS_BE_SRC_OLAP_COLUMN_MAPPING_H
 #define DORIS_BE_SRC_OLAP_COLUMN_MAPPING_H
 
-#include "olap/wrapper_field.h"
-
 namespace doris {
 
+class WrapperField;
+
 struct ColumnMapping {
     ColumnMapping() : ref_column(-1), default_value(nullptr) {}
     virtual ~ColumnMapping() {}

diff --git a/be/src/olap/delete_handler.cpp b/be/src/olap/delete_handler.cpp
@@ -29,6 +29,7 @@
 #include "gen_cpp/olap_file.pb.h"
 #include "olap/olap_common.h"
 #include "olap/utils.h"
+#include "olap/olap_cond.h"
 
 using apache::thrift::ThriftDebugString;
 using std::numeric_limits;

diff --git a/be/src/olap/delete_handler.h b/be/src/olap/delete_handler.h
@@ -23,14 +23,14 @@
 
 #include "gen_cpp/AgentService_types.h"
 #include "gen_cpp/olap_file.pb.h"
-#include "olap/field.h"
-#include "olap/olap_cond.h"
 #include "olap/olap_define.h"
-#include "olap/row_cursor.h"
+#include "olap/tablet_schema.h"
 
 namespace doris {
 
 typedef google::protobuf::RepeatedPtrField<DeletePredicatePB> DelPredicateArray;
+class Conditions;
+class RowCursor;
 
 class DeleteConditionHandler {
 public:

diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp
@@ -18,6 +18,7 @@
 #include "olap/delta_writer.h"
 
 #include "olap/schema.h"
+#include "olap/memtable.h"
 #include "olap/data_dir.h"
 #include "olap/rowset/alpha_rowset_writer.h"
 #include "olap/rowset/rowset_meta_manager.h"

diff --git a/be/src/olap/delta_writer.h b/be/src/olap/delta_writer.h
@@ -18,7 +18,6 @@
 #ifndef DORIS_BE_SRC_DELTA_WRITER_H
 #define DORIS_BE_SRC_DELTA_WRITER_H
 
-#include "olap/memtable.h"
 #include "olap/storage_engine.h"
 #include "olap/tablet.h"
 #include "olap/schema_change.h"
@@ -30,6 +29,8 @@
 namespace doris {
 
 class SegmentGroup;
+class MemTable;
+class Schema;
 
 enum WriteType {
     LOAD = 1,

diff --git a/be/src/olap/field.h b/be/src/olap/field.h
@@ -26,6 +26,7 @@
 #include "olap/olap_define.h"
 #include "olap/tablet_schema.h"
 #include "olap/types.h"
+#include "olap/key_coder.h"
 #include "olap/utils.h"
 #include "olap/row_cursor_cell.h"
 #include "runtime/mem_pool.h"
@@ -57,26 +58,30 @@ class Field {
     Field(const TabletColumn& column)
         : _type_info(get_type_info(column.type())),
         _agg_info(get_aggregate_info(column.aggregation(), column.type())),
+        _key_coder(get_key_coder(column.type())),
         _index_size(column.index_length()),
         _is_nullable(column.is_nullable()) { }
 
     Field(FieldType type)
         : _type_info(get_type_info(type)),
         _agg_info(get_aggregate_info(OLAP_FIELD_AGGREGATION_NONE, type)),
+        _key_coder(get_key_coder(type)),
         _index_size(_type_info->size()),
         _is_nullable(true) {
     }
 
     Field(const FieldAggregationMethod& agg, const FieldType& type, bool is_nullable)
         : _type_info(get_type_info(type)),
         _agg_info(get_aggregate_info(agg, type)),
+        _key_coder(get_key_coder(type)),
         _index_size(-1),
         _is_nullable(is_nullable) {
     }
 
     Field(const FieldAggregationMethod& agg, const FieldType& type, size_t index_size, bool is_nullable)
         : _type_info(get_type_info(type)),
         _agg_info(get_aggregate_info(agg, type)),
+        _key_coder(get_key_coder(type)),
         _index_size(index_size),
         _is_nullable(is_nullable) {
     }
@@ -233,10 +238,19 @@ class Field {
     FieldType type() const { return _type_info->type(); }
     const TypeInfo* type_info() const { return _type_info; }
     bool is_nullable() const { return _is_nullable; }
+
+    void encode_ascending(const void* value, std::string* buf) const {
+        _key_coder->encode_ascending(value, _index_size, buf);
+    }
+
+    Status decode_ascending(Slice* encoded_key, uint8_t* cell_ptr, Arena* arena) const {
+        return _key_coder->decode_ascending(encoded_key, _index_size, cell_ptr, arena);
+    }
 private:
     // Field的最大长度，单位为字节，通常等于length， 变长字符串不同
     const TypeInfo* _type_info;
     const AggregateInfo* _agg_info;
+    const KeyCoder* _key_coder;
     uint16_t _index_size;
     bool _is_nullable;
 };

diff --git a/be/src/olap/iterators.h b/be/src/olap/iterators.h
@@ -17,14 +17,33 @@
 
 #pragma once
 
+#include <memory>
+
 #include "common/status.h"
 
 namespace doris {
 
+class RowCursor;
 class RowBlockV2;
 class Schema;
 
 struct StorageReadOptions {
+    // lower_bound defines the smallest key at which iterator will
+    // return data.
+    // If lower_bound is null, won't return
+    std::shared_ptr<RowCursor> lower_bound;
+
+    // If include_lower_bound is true, data equal with lower_bound will
+    // be read
+    bool include_lower_bound;
+
+    // upper_bound defines the extend upto which the iterator can return
+    // data.
+    std::shared_ptr<RowCursor> upper_bound;
+
+    // If include_upper_bound is true, data equal with upper_bound will
+    // be read
+    bool include_upper_bound;
 };
 
 // Used to read data in RowBlockV2 one by one