Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Segment for BetaRowset #1577

Merged
merged 5 commits into from
Aug 6, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 153 additions & 0 deletions be/src/gutil/endian.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,18 @@ inline uint64 gbswap_64(uint64 host_int) {
#endif // bswap_64
}

inline unsigned __int128 gbswap_128(unsigned __int128 host_int) {
return static_cast<unsigned __int128>(bswap_64(static_cast<uint64>(host_int >> 64))) |
(static_cast<unsigned __int128>(bswap_64(static_cast<uint64>(host_int))) << 64);
}

// Swap bytes of a 24-bit value.
inline uint32_t bswap_24(uint32_t x) {
return ((x & 0x0000ffULL) << 16) |
((x & 0x00ff00ULL)) |
((x & 0xff0000ULL) >> 16);
}

#ifdef IS_LITTLE_ENDIAN

// Definitions for ntohl etc. that don't require us to include
Expand Down Expand Up @@ -188,4 +200,145 @@ class LittleEndian {
#define gntohll(x) ghtonll(x)
#define ntohll(x) htonll(x)

// Utilities to convert numbers between the current hosts's native byte
// order and big-endian byte order (same as network byte order)
//
// Load/Store methods are alignment safe
class BigEndian {
public:
#ifdef IS_LITTLE_ENDIAN

static uint16 FromHost16(uint16 x) { return bswap_16(x); }
static uint16 ToHost16(uint16 x) { return bswap_16(x); }

static uint32 FromHost24(uint32 x) { return bswap_24(x); }
static uint32 ToHost24(uint32 x) { return bswap_24(x); }

static uint32 FromHost32(uint32 x) { return bswap_32(x); }
static uint32 ToHost32(uint32 x) { return bswap_32(x); }

static uint64 FromHost64(uint64 x) { return gbswap_64(x); }
static uint64 ToHost64(uint64 x) { return gbswap_64(x); }

static unsigned __int128 FromHost128(unsigned __int128 x) { return gbswap_128(x); }
static unsigned __int128 ToHost128(unsigned __int128 x) { return gbswap_128(x); }

static bool IsLittleEndian() { return true; }

#elif defined IS_BIG_ENDIAN

static uint16 FromHost16(uint16 x) { return x; }
static uint16 ToHost16(uint16 x) { return x; }

static uint32 FromHost24(uint32 x) { return x; }
static uint32 ToHost24(uint32 x) { return x; }

static uint32 FromHost32(uint32 x) { return x; }
static uint32 ToHost32(uint32 x) { return x; }

static uint64 FromHost64(uint64 x) { return x; }
static uint64 ToHost64(uint64 x) { return x; }

static uint128 FromHost128(uint128 x) { return x; }
static uint128 ToHost128(uint128 x) { return x; }

static bool IsLittleEndian() { return false; }

#endif /* ENDIAN */
// Functions to do unaligned loads and stores in little-endian order.
static uint16 Load16(const void *p) {
return ToHost16(UNALIGNED_LOAD16(p));
}

static void Store16(void *p, uint16 v) {
UNALIGNED_STORE16(p, FromHost16(v));
}

static uint32 Load32(const void *p) {
return ToHost32(UNALIGNED_LOAD32(p));
}

static void Store32(void *p, uint32 v) {
UNALIGNED_STORE32(p, FromHost32(v));
}

static uint64 Load64(const void *p) {
return ToHost64(UNALIGNED_LOAD64(p));
}

// Build a uint64 from 1-8 bytes.
// 8 * len least significant bits are loaded from the memory with
// BigEndian order. The 64 - 8 * len most significant bits are
// set all to 0.
// In latex-friendly words, this function returns:
// $\sum_{i=0}^{len-1} p[i] 256^{i}$, where p[i] is unsigned.
//
// This function is equivalent with:
// uint64 val = 0;
// memcpy(&val, p, len);
// return ToHost64(val);
// TODO(user): write a small benchmark and benchmark the speed
// of a memcpy based approach.
//
// For speed reasons this function does not work for len == 0.
// The caller needs to guarantee that 1 <= len <= 8.
static uint64 Load64VariableLength(const void * const p, int len) {
assert(len >= 1 && len <= 8);
uint64 val = Load64(p);
uint64 mask = 0;
--len;
do {
mask = (mask << 8) | 0xff;
// (--len >= 0) is about 10 % faster than (len--) in some benchmarks.
} while (--len >= 0);
return val & mask;
}

static void Store64(void *p, uint64 v) {
UNALIGNED_STORE64(p, FromHost64(v));
}

static uint128 Load128(const void *p) {
return uint128(
ToHost64(UNALIGNED_LOAD64(p)),
ToHost64(UNALIGNED_LOAD64(reinterpret_cast<const uint64 *>(p) + 1)));
}

static void Store128(void *p, const uint128 v) {
UNALIGNED_STORE64(p, FromHost64(Uint128High64(v)));
UNALIGNED_STORE64(reinterpret_cast<uint64 *>(p) + 1,
FromHost64(Uint128Low64(v)));
}

// Build a uint128 from 1-16 bytes.
// 8 * len least significant bits are loaded from the memory with
// BigEndian order. The 128 - 8 * len most significant bits are
// set all to 0.
static uint128 Load128VariableLength(const void *p, int len) {
if (len <= 8) {
return uint128(Load64VariableLength(static_cast<const char *>(p)+8,
len));
} else {
return uint128(
Load64VariableLength(p, len-8),
Load64(static_cast<const char *>(p)+8));
}
}

// Load & Store in machine's word size.
static uword_t LoadUnsignedWord(const void *p) {
if (sizeof(uword_t) == 8)
return Load64(p);
else
return Load32(p);
}

static void StoreUnsignedWord(void *p, uword_t v) {
if (sizeof(uword_t) == 8)
Store64(p, v);
else
Store32(p, v);
}
}; // BigEndian

#endif // UTIL_ENDIAN_ENDIAN_H_
5 changes: 5 additions & 0 deletions be/src/olap/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ add_library(Olap STATIC
hll.cpp
in_list_predicate.cpp
in_stream.cpp
key_coder.cpp
lru_cache.cpp
memtable.cpp
merger.cpp
Expand All @@ -63,6 +64,7 @@ add_library(Olap STATIC
serialize.cpp
storage_engine.cpp
data_dir.cpp
short_key_index.cpp
snapshot_manager.cpp
stream_index_common.cpp
stream_index_reader.cpp
Expand All @@ -84,6 +86,9 @@ add_library(Olap STATIC
rowset/segment_v2/encoding_info.cpp
rowset/segment_v2/ordinal_page_index.cpp
rowset/segment_v2/binary_dict_page.cpp
rowset/segment_v2/segment.cpp
rowset/segment_v2/segment_iterator.cpp
rowset/segment_v2/segment_writer.cpp
rowset_factory.cpp
task/engine_batch_load_task.cpp
task/engine_checksum_task.cpp
Expand Down
4 changes: 2 additions & 2 deletions be/src/olap/column_mapping.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@
#ifndef DORIS_BE_SRC_OLAP_COLUMN_MAPPING_H
#define DORIS_BE_SRC_OLAP_COLUMN_MAPPING_H

#include "olap/wrapper_field.h"

namespace doris {

class WrapperField;

struct ColumnMapping {
ColumnMapping() : ref_column(-1), default_value(nullptr) {}
virtual ~ColumnMapping() {}
Expand Down
1 change: 1 addition & 0 deletions be/src/olap/delete_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "gen_cpp/olap_file.pb.h"
#include "olap/olap_common.h"
#include "olap/utils.h"
#include "olap/olap_cond.h"

using apache::thrift::ThriftDebugString;
using std::numeric_limits;
Expand Down
6 changes: 3 additions & 3 deletions be/src/olap/delete_handler.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@

#include "gen_cpp/AgentService_types.h"
#include "gen_cpp/olap_file.pb.h"
#include "olap/field.h"
#include "olap/olap_cond.h"
#include "olap/olap_define.h"
#include "olap/row_cursor.h"
#include "olap/tablet_schema.h"

namespace doris {

typedef google::protobuf::RepeatedPtrField<DeletePredicatePB> DelPredicateArray;
class Conditions;
class RowCursor;

class DeleteConditionHandler {
public:
Expand Down
1 change: 1 addition & 0 deletions be/src/olap/delta_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "olap/delta_writer.h"

#include "olap/schema.h"
#include "olap/memtable.h"
#include "olap/data_dir.h"
#include "olap/rowset/alpha_rowset_writer.h"
#include "olap/rowset/rowset_meta_manager.h"
Expand Down
3 changes: 2 additions & 1 deletion be/src/olap/delta_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
#ifndef DORIS_BE_SRC_DELTA_WRITER_H
#define DORIS_BE_SRC_DELTA_WRITER_H

#include "olap/memtable.h"
#include "olap/storage_engine.h"
#include "olap/tablet.h"
#include "olap/schema_change.h"
Expand All @@ -30,6 +29,8 @@
namespace doris {

class SegmentGroup;
class MemTable;
class Schema;

enum WriteType {
LOAD = 1,
Expand Down
14 changes: 14 additions & 0 deletions be/src/olap/field.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "olap/olap_define.h"
#include "olap/tablet_schema.h"
#include "olap/types.h"
#include "olap/key_coder.h"
#include "olap/utils.h"
#include "olap/row_cursor_cell.h"
#include "runtime/mem_pool.h"
Expand Down Expand Up @@ -57,26 +58,30 @@ class Field {
Field(const TabletColumn& column)
: _type_info(get_type_info(column.type())),
_agg_info(get_aggregate_info(column.aggregation(), column.type())),
_key_coder(get_key_coder(column.type())),
_index_size(column.index_length()),
_is_nullable(column.is_nullable()) { }

Field(FieldType type)
: _type_info(get_type_info(type)),
_agg_info(get_aggregate_info(OLAP_FIELD_AGGREGATION_NONE, type)),
_key_coder(get_key_coder(type)),
_index_size(_type_info->size()),
_is_nullable(true) {
}

Field(const FieldAggregationMethod& agg, const FieldType& type, bool is_nullable)
: _type_info(get_type_info(type)),
_agg_info(get_aggregate_info(agg, type)),
_key_coder(get_key_coder(type)),
_index_size(-1),
_is_nullable(is_nullable) {
}

Field(const FieldAggregationMethod& agg, const FieldType& type, size_t index_size, bool is_nullable)
: _type_info(get_type_info(type)),
_agg_info(get_aggregate_info(agg, type)),
_key_coder(get_key_coder(type)),
_index_size(index_size),
_is_nullable(is_nullable) {
}
Expand Down Expand Up @@ -233,10 +238,19 @@ class Field {
FieldType type() const { return _type_info->type(); }
const TypeInfo* type_info() const { return _type_info; }
bool is_nullable() const { return _is_nullable; }

void encode_ascending(const void* value, std::string* buf) const {
_key_coder->encode_ascending(value, _index_size, buf);
}

Status decode_ascending(Slice* encoded_key, uint8_t* cell_ptr, Arena* arena) const {
return _key_coder->decode_ascending(encoded_key, _index_size, cell_ptr, arena);
}
private:
// Field的最大长度,单位为字节,通常等于length, 变长字符串不同
const TypeInfo* _type_info;
const AggregateInfo* _agg_info;
const KeyCoder* _key_coder;
uint16_t _index_size;
bool _is_nullable;
};
Expand Down
19 changes: 19 additions & 0 deletions be/src/olap/iterators.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,33 @@

#pragma once

#include <memory>

#include "common/status.h"

namespace doris {

class RowCursor;
class RowBlockV2;
class Schema;

struct StorageReadOptions {
// lower_bound defines the smallest key at which iterator will
// return data.
// If lower_bound is null, won't return
std::shared_ptr<RowCursor> lower_bound;
imay marked this conversation as resolved.
Show resolved Hide resolved

// If include_lower_bound is true, data equal with lower_bound will
// be read
bool include_lower_bound;

// upper_bound defines the extend upto which the iterator can return
// data.
std::shared_ptr<RowCursor> upper_bound;

// If include_upper_bound is true, data equal with upper_bound will
// be read
bool include_upper_bound;
};

// Used to read data in RowBlockV2 one by one
Expand Down
Loading