From 99396117c1e4b4642ce16b7a2244d20b5e12e558 Mon Sep 17 00:00:00 2001 From: LiMK Date: Tue, 20 Jun 2023 11:34:42 +0800 Subject: [PATCH 01/10] ci(codecov): update the config --- .github/codecov.yml | 3 +++ .github/workflows/pull_requests.yml | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 .github/codecov.yml diff --git a/.github/codecov.yml b/.github/codecov.yml new file mode 100644 index 000000000..f3547a439 --- /dev/null +++ b/.github/codecov.yml @@ -0,0 +1,3 @@ +ignore: +- ^stonedb/include/boost_1_66_0.* +- ^stonedb/extra.* \ No newline at end of file diff --git a/.github/workflows/pull_requests.yml b/.github/workflows/pull_requests.yml index 77461ec5d..565be0624 100644 --- a/.github/workflows/pull_requests.yml +++ b/.github/workflows/pull_requests.yml @@ -137,4 +137,5 @@ jobs: lcov --capture --directory . --output-file coverage.info --test-name coverage - name: Codecov - uses: codecov/codecov-action@v3.1.1 + uses: codecov/codecov-action@v3.1.4 + From d7584f1374bfe34d80d645ed42316b5da097a2cb Mon Sep 17 00:00:00 2001 From: RingsC Date: Wed, 14 Jun 2023 19:53:14 +0800 Subject: [PATCH 02/10] fix(tianmu): To suuport ignore option for update statement To support `update ignore` statement. The logic of uniqueness check is re-implemented. --- mysql-test/suite/tianmu/r/issue1616.result | 61 ++++++++++++++++++++ mysql-test/suite/tianmu/t/issue1616.test | 64 +++++++++++++++++++++ storage/tianmu/core/engine.cpp | 8 +-- storage/tianmu/core/engine.h | 5 ++ storage/tianmu/core/table_share.cpp | 1 + storage/tianmu/core/table_share.h | 3 + storage/tianmu/core/tianmu_table.cpp | 4 +- storage/tianmu/handler/ha_tianmu.cpp | 11 +++- storage/tianmu/handler/ha_tianmu.h | 5 ++ storage/tianmu/index/tianmu_table_index.cpp | 56 +++++++++++------- storage/tianmu/index/tianmu_table_index.h | 4 ++ storage/tianmu/vc/tianmu_attr.cpp | 57 +++++++++++++----- storage/tianmu/vc/tianmu_attr.h | 4 +- 13 files changed, 239 insertions(+), 44 deletions(-) create mode 100644 mysql-test/suite/tianmu/r/issue1616.result create mode 100644 mysql-test/suite/tianmu/t/issue1616.test diff --git a/mysql-test/suite/tianmu/r/issue1616.result b/mysql-test/suite/tianmu/r/issue1616.result new file mode 100644 index 000000000..a6d09cb49 --- /dev/null +++ b/mysql-test/suite/tianmu/r/issue1616.result @@ -0,0 +1,61 @@ +DROP DATABASE IF EXISTS issue1616_test; +CREATE DATABASE issue1616_test; +USE issue1616_test; +CREATE TABLE T1 (id int(11) NOT NULL auto_increment, parent_id int(11) DEFAULT '0' NOT NULL, level tinyint(4) +DEFAULT '0' NOT NULL, PRIMARY KEY (id)) engine=tianmu; +INSERT INTO T1 VALUES (3,1,1),(4,1,1); +INSERT INTO T1 VALUES (3,1,1),(4,1,1); +ERROR 23000: Duplicate entry '3' for key 'PRIMARY' +SELECT * FROM T1; +id parent_id level +3 1 1 +4 1 1 +UPDATE IGNORE T1 SET id=id+1; +SELECT * FROM T1; +id parent_id level +3 1 1 +5 1 1 +UPDATE T1 SET id =10; +ERROR 23000: Duplicate entry '10' for key 'PRIMARY' +SELECT * FROM T1; +id parent_id level +3 1 1 +5 1 1 +UPDATE T1 SET ID=5 WHERE ID=3; +ERROR 23000: Duplicate entry '5' for key 'PRIMARY' +SELECT * FROM T1; +id parent_id level +3 1 1 +5 1 1 +DROP TABLE T1; +CREATE TABLE T2 (dt datetime, val int, primary key(dt)) ENGINE =tianmu; +INSERT INTO T2 VALUES ('2017-11-05 20:29:36',1), ('2027-11-05 20:29:36', 2); +UPDATE T2 SET dt ='2027-11-05 20:29:36' WHERE val =1; +ERROR 23000: Duplicate entry '2027-11-05 20:29:36' for key 'PRIMARY' +SELECT * FROM T2; +dt val +2017-11-05 20:29:36 1 +2027-11-05 20:29:36 2 +DROP TABLE T2; +CREATE TABLE T3 (id int(11) NOT NULL auto_increment, parent_id int(11) DEFAULT '0' NOT NULL, level tinyint(4) +DEFAULT '0' NOT NULL, PRIMARY KEY (id, parent_id)) engine=tianmu; +INSERT INTO T3 VALUES (3,1,1),(4,1,1); +INSERT INTO T3 VALUES (3,1,1),(4,1,1); +ERROR 23000: Duplicate entry '3-1' for key 'PRIMARY' +UPDATE IGNORE T3 SET id=id+1; +SELECT * FROM T3; +id parent_id level +4 1 1 +5 1 1 +DROP TABLE T3; +CREATE TABLE T4 (id int(11) NOT NULL auto_increment, parent_id int(11) DEFAULT '0' NOT NULL, level tinyint(4) +DEFAULT '0' NOT NULL, PRIMARY KEY (id)) engine=innodb; +INSERT INTO T4 VALUES (3,1,1),(4,1,1); +UPDATE T4 SET id =10; +ERROR 23000: Duplicate entry '10' for key 'PRIMARY' +SELECT * FROM T4; +id parent_id level +3 1 1 +4 1 1 +DROP TABLE T4; +DROP DATABASE issue1616_test; diff --git a/mysql-test/suite/tianmu/t/issue1616.test b/mysql-test/suite/tianmu/t/issue1616.test new file mode 100644 index 000000000..7805fb960 --- /dev/null +++ b/mysql-test/suite/tianmu/t/issue1616.test @@ -0,0 +1,64 @@ +--source include/have_tianmu.inc +--disable_warnings +DROP DATABASE IF EXISTS issue1616_test; +CREATE DATABASE issue1616_test; +USE issue1616_test; +--enable_warnings + +CREATE TABLE T1 (id int(11) NOT NULL auto_increment, parent_id int(11) DEFAULT '0' NOT NULL, level tinyint(4) + DEFAULT '0' NOT NULL, PRIMARY KEY (id)) engine=tianmu; + +INSERT INTO T1 VALUES (3,1,1),(4,1,1); +--ERROR 1062 +INSERT INTO T1 VALUES (3,1,1),(4,1,1); + +SELECT * FROM T1; + +UPDATE IGNORE T1 SET id=id+1; + +SELECT * FROM T1; + +--ERROR 1062 +UPDATE T1 SET id =10; + +SELECT * FROM T1; + +--ERROR 1062 +UPDATE T1 SET ID=5 WHERE ID=3; +SELECT * FROM T1; + +DROP TABLE T1; + + +CREATE TABLE T2 (dt datetime, val int, primary key(dt)) ENGINE =tianmu; +INSERT INTO T2 VALUES ('2017-11-05 20:29:36',1), ('2027-11-05 20:29:36', 2); +--ERROR 1062 +UPDATE T2 SET dt ='2027-11-05 20:29:36' WHERE val =1; + +SELECT * FROM T2; +DROP TABLE T2; + +#multi-keys +CREATE TABLE T3 (id int(11) NOT NULL auto_increment, parent_id int(11) DEFAULT '0' NOT NULL, level tinyint(4) + DEFAULT '0' NOT NULL, PRIMARY KEY (id, parent_id)) engine=tianmu; + +INSERT INTO T3 VALUES (3,1,1),(4,1,1); +--ERROR 1062 +INSERT INTO T3 VALUES (3,1,1),(4,1,1); + +UPDATE IGNORE T3 SET id=id+1; +SELECT * FROM T3; + +DROP TABLE T3; + +CREATE TABLE T4 (id int(11) NOT NULL auto_increment, parent_id int(11) DEFAULT '0' NOT NULL, level tinyint(4) + DEFAULT '0' NOT NULL, PRIMARY KEY (id)) engine=innodb; + +INSERT INTO T4 VALUES (3,1,1),(4,1,1); +--ERROR 1062 +UPDATE T4 SET id =10; + +SELECT * FROM T4; +DROP TABLE T4; + +DROP DATABASE issue1616_test; diff --git a/storage/tianmu/core/engine.cpp b/storage/tianmu/core/engine.cpp index aefa3472b..fb402307f 100644 --- a/storage/tianmu/core/engine.cpp +++ b/storage/tianmu/core/engine.cpp @@ -1826,7 +1826,7 @@ int Engine::InsertRow(const std::string &table_path, [[maybe_unused]] Transactio int Engine::UpdateRow(const std::string &table_path, TABLE *table, std::shared_ptr &share, uint64_t row_id, const uchar *old_data, uchar *new_data) { - DBUG_ENTER("Engine::UpdateRow"); + // DBUG_ENTER("Engine::UpdateRow"); int ret = 0; if (tianmu_sysvar_insert_delayed && table->s->tmp_table == NO_TMP_TABLE && tianmu_sysvar_enable_rowstore) { UpdateToDelta(table_path, share, table, row_id, old_data, new_data); @@ -1835,13 +1835,13 @@ int Engine::UpdateRow(const std::string &table_path, TABLE *table, std::shared_p auto tm_table = current_txn_->GetTableByPath(table_path); ret = tm_table->Update(table, row_id, old_data, new_data); } - - DBUG_RETURN(ret); + return ret; + // DBUG_RETURN(ret); } int Engine::DeleteRow(const std::string &table_path, TABLE *table, std::shared_ptr &share, uint64_t row_id) { - DBUG_ENTER("Engine::UpdateRow"); + DBUG_ENTER("Engine::DeleteRow"); int ret = 0; if (tianmu_sysvar_insert_delayed && table->s->tmp_table == NO_TMP_TABLE && tianmu_sysvar_enable_rowstore) { DeleteToDelta(share, table, row_id); diff --git a/storage/tianmu/core/engine.h b/storage/tianmu/core/engine.h index d7e30b007..af38c6841 100644 --- a/storage/tianmu/core/engine.h +++ b/storage/tianmu/core/engine.h @@ -179,6 +179,9 @@ class Engine final { static const char *StrToFiled(const char *ptr, Field *field, DeltaRecordHead *deltaRecord, int col_num); static char *FiledToStr(char *ptr, Field *field, DeltaRecordHead *deltaRecord, int col_num, THD *thd); + void setExtra(ha_extra_function extra) { extra_info = extra; } + ha_extra_function getExtra() { return extra_info; } + private: void AddTx(Transaction *tx); void RemoveTx(Transaction *tx); @@ -288,6 +291,8 @@ class Engine final { uint64_t m_mem_available_ = 0; uint64_t m_swap_used_ = 0; index::KVStore *store_; + + ha_extra_function extra_info; }; class ResultSender { diff --git a/storage/tianmu/core/table_share.cpp b/storage/tianmu/core/table_share.cpp index a96cfcaf6..eb09cf1c1 100644 --- a/storage/tianmu/core/table_share.cpp +++ b/storage/tianmu/core/table_share.cpp @@ -26,6 +26,7 @@ namespace Tianmu { namespace core { TableShare::TableShare(const fs::path &table_path, const TABLE_SHARE *table_share) : no_cols(table_share->fields), table_path(table_path) { + s = const_cast(table_share); try { system::TianmuFile ftbl; ftbl.OpenReadOnly(table_path / common::TABLE_DESC_FILE); diff --git a/storage/tianmu/core/table_share.h b/storage/tianmu/core/table_share.h index bfdd07bc9..5e62dbc35 100644 --- a/storage/tianmu/core/table_share.h +++ b/storage/tianmu/core/table_share.h @@ -67,6 +67,9 @@ class TableShare final { // MySQL lock THR_LOCK thr_lock; + // TABLE share in sql. + TABLE_SHARE *s; + private: TABLE_META meta; size_t no_cols; diff --git a/storage/tianmu/core/tianmu_table.cpp b/storage/tianmu/core/tianmu_table.cpp index 72c8c4e2f..e94a4c0df 100644 --- a/storage/tianmu/core/tianmu_table.cpp +++ b/storage/tianmu/core/tianmu_table.cpp @@ -1016,7 +1016,6 @@ int TianmuTable::Insert(TABLE *table) { } int TianmuTable::Update(TABLE *table, uint64_t row_id, const uchar *old_data, uchar *new_data) { - // todo(dfx): move to before for loop, need test my_bitmap_map *org_bitmap2 = dbug_tmp_use_all_columns(table, table->read_set); std::shared_ptr defer(nullptr, [org_bitmap2, table](...) { dbug_tmp_restore_column_map(table->read_set, org_bitmap2); }); @@ -1025,6 +1024,9 @@ int TianmuTable::Update(TABLE *table, uint64_t row_id, const uchar *old_data, uc core::Engine *eng = reinterpret_cast(tianmu_hton->data); assert(eng); + // uinsg check_unique_constraint(table) to check whether it has unique constr on this table; + // now, tianmu only support PK, the unique constraint is not supported now. + // the vfield is not in our consideration. and should not has any triggers on it. for (uint col_id = 0; col_id < table->s->fields; col_id++) { if (!bitmap_is_set(table->write_set, col_id)) { continue; diff --git a/storage/tianmu/handler/ha_tianmu.cpp b/storage/tianmu/handler/ha_tianmu.cpp index a7fbc4a6d..8e2c78a83 100644 --- a/storage/tianmu/handler/ha_tianmu.cpp +++ b/storage/tianmu/handler/ha_tianmu.cpp @@ -419,7 +419,7 @@ int ha_tianmu::write_row([[maybe_unused]] uchar *buf) { Called from sql_select.cc, sql_acl.cc, sql_update.cc, and sql_insert.cc. */ int ha_tianmu::update_row(const uchar *old_data, uchar *new_data) { - DBUG_ENTER(__PRETTY_FUNCTION__); + DBUG_ENTER("ha_tianmu::update_row"); core::Engine *eng = reinterpret_cast(tianmu_hton->data); assert(eng); @@ -1176,6 +1176,15 @@ int ha_tianmu::extra(enum ha_extra_function operation) { cq_.reset(); query_.reset(); } + + extra_info = operation; + if (operation == HA_EXTRA_IGNORE_DUP_KEY || operation == HA_EXTRA_NO_IGNORE_DUP_KEY) { + core::Engine *eng = reinterpret_cast(tianmu_hton->data); + ASSERT(eng); + if (eng) + eng->setExtra(extra_info); + } + DBUG_RETURN(0); } diff --git a/storage/tianmu/handler/ha_tianmu.h b/storage/tianmu/handler/ha_tianmu.h index ccb0cfde6..4114cff32 100644 --- a/storage/tianmu/handler/ha_tianmu.h +++ b/storage/tianmu/handler/ha_tianmu.h @@ -155,6 +155,9 @@ class ha_tianmu final : public handler { int fill_row_by_id(uchar *buf, uint64_t rowid); void key_convert(const uchar *key, uint key_len, std::vector cols, std::vector &keys); + void setExtra(ha_extra_function extra) { extra_info = extra; } + ha_extra_function getExtra() { return extra_info; } + public: static const Alter_inplace_info::HA_ALTER_FLAGS TIANMU_SUPPORTED_ALTER_ADD_DROP_ORDER; static const Alter_inplace_info::HA_ALTER_FLAGS TIANMU_SUPPORTED_ALTER_COLUMN_NAME; @@ -182,6 +185,8 @@ class ha_tianmu final : public handler { std::unique_ptr cq_; bool result_ = false; std::vector> blob_buffers_; + + ha_extra_function extra_info; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */ }; } // namespace DBHandler diff --git a/storage/tianmu/index/tianmu_table_index.cpp b/storage/tianmu/index/tianmu_table_index.cpp index 38dfcfdf7..4e8db0112 100644 --- a/storage/tianmu/index/tianmu_table_index.cpp +++ b/storage/tianmu/index/tianmu_table_index.cpp @@ -27,6 +27,8 @@ namespace Tianmu { namespace index { +// Here, In future, tianmu maybe has uniquekey, secondary key, fulltext, etc. Therefore, a type of index should be +// added. But, now. we only have only one index: PK. TianmuTableIndex::TianmuTableIndex(const std::string &name, TABLE *table) { std::string fullname; // normalize the table name. @@ -168,16 +170,12 @@ common::ErrorCode TianmuTableIndex::CheckUniqueness(core::Transaction *tx, const s = tx->KVTrans().Get(rocksdb_key_->get_cf(), pk_slice, &retrieved_value); } - if (!s.ok() && !s.IsNotFound()) { - TIANMU_LOG(LogCtl_Level::ERROR, "RockDb read fail:%s", s.getState()); - return common::ErrorCode::FAILED; - } - - if (s.ok()) { + if (s.ok()) { // means that there's another key existed. return common::ErrorCode::DUPP_KEY; - } - - return common::ErrorCode::SUCCESS; + } else if (s.IsNotFound()) { // not exist a key. + return common::ErrorCode::NOT_FOUND_KEY; + } else // failed. + return common::ErrorCode::FAILED; } common::ErrorCode TianmuTableIndex::InsertIndex(core::Transaction *tx, std::vector &fields, uint64_t row) { @@ -185,9 +183,13 @@ common::ErrorCode TianmuTableIndex::InsertIndex(core::Transaction *tx, std::vect rocksdb_key_->pack_key(key, fields, value); + // 1)inserts into a PK; 2) insert into without a PK. the return value of `CheckUniqueness` is as following: + // 1: common::ErrorCode::DUPP_KEY; 2: common::ErrorCode::NOT_FOUND_KEY; 3:common::ErrorCode::FAILED common::ErrorCode err_code = CheckUniqueness(tx, {(const char *)key.ptr(), key.length()}); - if (err_code != common::ErrorCode::SUCCESS) + if (idx_type_ == IndexType::INDEX_TYPE_PRIMARY && err_code == common::ErrorCode::DUPP_KEY) { // PK return err_code; + } else if (idx_type_ == IndexType::INDEX_TYPE_SECONDARY) { // do not need uniqueness constrain + } value.write_uint64(row); const auto cf = rocksdb_key_->get_cf(); @@ -196,27 +198,37 @@ common::ErrorCode TianmuTableIndex::InsertIndex(core::Transaction *tx, std::vect if (!s.ok()) { TIANMU_LOG(LogCtl_Level::ERROR, "RocksDB: insert key fail!"); err_code = common::ErrorCode::FAILED; - } + } else if (s.ok()) + err_code = common::ErrorCode::SUCCESS; + return err_code; } common::ErrorCode TianmuTableIndex::UpdateIndex(core::Transaction *tx, std::string &nkey, std::string &okey, uint64_t row) { - StringWriter value, packkey; - std::vector ofields, nfields; + StringWriter new_value, old_value, new_packke, old_packkey; + std::vector old_fields, new_fields; + + old_fields.emplace_back(okey); + new_fields.emplace_back(nkey); + + rocksdb_key_->pack_key(new_packke, new_fields, new_value); + rocksdb_key_->pack_key(old_packkey, old_fields, old_value); - ofields.emplace_back(okey); - nfields.emplace_back(nkey); + common::ErrorCode err_code = CheckUniqueness(tx, {(const char *)new_packke.ptr(), new_packke.length()}); - rocksdb_key_->pack_key(packkey, ofields, value); - common::ErrorCode err_code = CheckUniqueness(tx, {(const char *)packkey.ptr(), packkey.length()}); if (err_code == common::ErrorCode::DUPP_KEY) { - const auto cf = rocksdb_key_->get_cf(); - tx->KVTrans().Delete(cf, {(const char *)packkey.ptr(), packkey.length()}); - } else { - TIANMU_LOG(LogCtl_Level::WARN, "RockDb: don't have the key for update!"); + } else if (err_code == common::ErrorCode::NOT_FOUND_KEY || err_code == common::ErrorCode::SUCCESS) { + // gets the old index. then update it. + common::ErrorCode code = CheckUniqueness(tx, {(const char *)old_packkey.ptr(), old_packkey.length()}); + if (code != common::ErrorCode::SUCCESS) { + const auto cf = rocksdb_key_->get_cf(); + // delete old index then insert a new one. + rocksdb::Status ret_del = tx->KVTrans().Delete(cf, {(const char *)old_packkey.ptr(), old_packkey.length()}); + err_code = InsertIndex(tx, new_fields, row); + } } - err_code = InsertIndex(tx, nfields, row); + return err_code; } diff --git a/storage/tianmu/index/tianmu_table_index.h b/storage/tianmu/index/tianmu_table_index.h index 725a9efac..6cb427ced 100644 --- a/storage/tianmu/index/tianmu_table_index.h +++ b/storage/tianmu/index/tianmu_table_index.h @@ -23,6 +23,7 @@ #include "rocksdb/db.h" #include "common/common_definitions.h" +#include "index/rdb_meta_manager.h" #include "index/rdb_utils.h" namespace Tianmu { @@ -58,6 +59,7 @@ class TianmuTableIndex final { common::ErrorCode UpdateIndex(core::Transaction *tx, std::string &nkey, std::string &okey, uint64_t row); common::ErrorCode DeleteIndex(core::Transaction *tx, std::vector &fields, uint64_t row); common::ErrorCode GetRowByKey(core::Transaction *tx, std::vector &fields, uint64_t &row); + index::IndexType type() { return idx_type_; } public: std::shared_ptr rocksdb_tbl_; @@ -67,6 +69,8 @@ class TianmuTableIndex final { uint keyid_ = 0; private: + index::IndexType idx_type_ = index::IndexType::INDEX_TYPE_PRIMARY; + common::ErrorCode CheckUniqueness(core::Transaction *tx, const rocksdb::Slice &pk_slice); }; diff --git a/storage/tianmu/vc/tianmu_attr.cpp b/storage/tianmu/vc/tianmu_attr.cpp index b4e0067b5..fa807d305 100644 --- a/storage/tianmu/vc/tianmu_attr.cpp +++ b/storage/tianmu/vc/tianmu_attr.cpp @@ -1094,20 +1094,40 @@ void TianmuAttr::UpdateData(uint64_t row, Value &old_v, Value &new_v) { // rclog << lock << "update data for row " << row << " col " << m_cid << // system::unlock; no_change = false; + core::Engine *eng = reinterpret_cast(tianmu_hton->data); + ASSERT(eng); auto pn = row2pack(row); FunctionExecutor fe([this, pn]() { LockPackForUse(pn); }, [this, pn]() { UnlockPackFromUse(pn); }); + // primary key process - UpdateIfIndex(nullptr, row, ColId(), old_v, new_v); + uint32_t colid = this->m_share->col_id; + auto owner = this->m_share->owner; + if (owner->s->primary_key == colid) { // if this col is defined as PK. Now, tianmu does not support unique key now. + common::ErrorCode err = UpdateIfIndex(nullptr, row, ColId(), old_v, new_v); + + if (eng && eng->getExtra() == HA_EXTRA_IGNORE_DUP_KEY) { // using `ingore` keyword + if (err == common::ErrorCode::DUPP_KEY) + return; + else if (err == common::ErrorCode::NOT_FOUND_KEY) + ; + else if (err == common::ErrorCode::FAILED) + ; + } else { // without `ignore` keywords. we throw a `DupKeyException` execption. + if (err == common::ErrorCode::DUPP_KEY) + throw common::DupKeyException(""); + else if (err == common::ErrorCode::NOT_FOUND_KEY) + ; + else if (err == common::ErrorCode::FAILED) + ; + } + } CopyPackForWrite(pn); auto &dpn = get_dpn(pn); auto dpn_save = dpn; - core::Engine *eng = reinterpret_cast(tianmu_hton->data); - assert(eng); - if (ct.Lookup() && new_v.HasValue()) { auto &str = new_v.GetString(); int code = m_dict->GetEncodedValue(str.data(), str.size()); @@ -1543,49 +1563,56 @@ std::shared_ptr TianmuAttr::GetFilter_Bloom() { FilterCoordinate(m_tid, m_cid, (int)FilterType::BLOOM, m_version.v1, m_version.v2), filter_creator)); } -void TianmuAttr::UpdateIfIndex(core::Transaction *tx, uint64_t row, uint64_t col, const Value &old_v, - const Value &new_v) { +common::ErrorCode TianmuAttr::UpdateIfIndex(core::Transaction *tx, uint64_t row, uint64_t col, const Value &old_v, + const Value &new_v) { + DBUG_ENTER("TianmuAttr::UpdateIfIndex"); + if (tx == nullptr) { tx = current_txn_; } + common::ErrorCode returnCode = common::ErrorCode::SUCCESS; core::Engine *eng = reinterpret_cast(tianmu_hton->data); - assert(eng); + ASSERT(eng); auto path = m_share->owner->Path(); std::shared_ptr tab = eng->GetTableIndex(path); // col is not primary key if (!tab) - return; + DBUG_RETURN(returnCode); std::vector keycols = tab->KeyCols(); if (std::find(keycols.begin(), keycols.end(), col) == keycols.end()) - return; + DBUG_RETURN(returnCode); - if (!new_v.HasValue()) - throw common::Exception("primary key not support null!"); + if (!new_v.HasValue()) { + returnCode = common::ErrorCode::UNSUPPORTED_DATATYPE; + TIANMU_LOG(LogCtl_Level::DEBUG, "primary key not support null!"); + DBUG_RETURN(returnCode); + } if (GetPackType() == common::PackType::STR) { auto &vnew = new_v.GetString(); auto &vold = old_v.GetString(); std::string nkey(vnew.data(), vnew.length()); std::string okey(vold.data(), vold.length()); - common::ErrorCode returnCode = tab->UpdateIndex(tx, nkey, okey, row); + returnCode = tab->UpdateIndex(tx, nkey, okey, row); if (returnCode == common::ErrorCode::DUPP_KEY || returnCode == common::ErrorCode::FAILED) { TIANMU_LOG(LogCtl_Level::DEBUG, "Duplicate entry: %s for primary key", vnew.data()); - throw common::DupKeyException("Duplicate entry: " + vnew + " for primary key"); + // throw common::DupKeyException("Duplicate entry: " + vnew + " for primary key"); } } else { // common::PackType::INT int64_t vnew = new_v.GetInt(); int64_t vold = old_v.GetInt(); std::string nkey(reinterpret_cast(&vnew), sizeof(int64_t)); std::string okey(reinterpret_cast(&vold), sizeof(int64_t)); - common::ErrorCode returnCode = tab->UpdateIndex(tx, nkey, okey, row); + returnCode = tab->UpdateIndex(tx, nkey, okey, row); if (returnCode == common::ErrorCode::DUPP_KEY || returnCode == common::ErrorCode::FAILED) { TIANMU_LOG(LogCtl_Level::DEBUG, "Duplicate entry :%" PRId64 " for primary key", vnew); - throw common::DupKeyException("Duplicate entry: " + std::to_string(vnew) + " for primary key"); + // throw common::DupKeyException("Duplicate entry: " + std::to_string(vnew) + " for primary key"); } } + return returnCode; } void TianmuAttr::DeleteByPrimaryKey(uint64_t row, uint64_t col) { diff --git a/storage/tianmu/vc/tianmu_attr.h b/storage/tianmu/vc/tianmu_attr.h index d5996c761..2129ea47c 100644 --- a/storage/tianmu/vc/tianmu_attr.h +++ b/storage/tianmu/vc/tianmu_attr.h @@ -93,7 +93,9 @@ class TianmuAttr final : public mm::TraceableObject, public PhysicalColumn, publ mm::TO_TYPE TraceableType() const override { return mm::TO_TYPE::TO_TEMPORARY; } void UpdateData(uint64_t row, Value &old_v, Value &new_v); void UpdateBatchData(core::Transaction *tx, const std::unordered_map> &rows); - void UpdateIfIndex(core::Transaction *tx, uint64_t row, uint64_t col, const Value &old_v, const Value &new_v); + common::ErrorCode UpdateIfIndex(core::Transaction *tx, uint64_t row, uint64_t col, const Value &old_v, + const Value &new_v); + void Truncate(); void DeleteData(uint64_t row); void DeleteByPrimaryKey(uint64_t row, uint64_t col); From 5606c87c796f85db519a99f60c915736280ca08e Mon Sep 17 00:00:00 2001 From: LiMK Date: Tue, 20 Jun 2023 17:11:08 +0800 Subject: [PATCH 03/10] ci(codecov): update the codecov congfig --- .github/codecov.yml => codecov.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/codecov.yml => codecov.yml (100%) diff --git a/.github/codecov.yml b/codecov.yml similarity index 100% rename from .github/codecov.yml rename to codecov.yml From 105b7300aae6a81e32de9987ab24431af8479544 Mon Sep 17 00:00:00 2001 From: LiMK Date: Tue, 20 Jun 2023 19:24:22 +0800 Subject: [PATCH 04/10] docs(intro): update the support for 8.0 --- .../current/00-about-stonedb/intro.md | 2 +- .../current/00-about-stonedb/limits.md | 2 +- .../current/09-FAQ/stonedb-faq.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/website/i18n/zh/docusaurus-plugin-content-docs/current/00-about-stonedb/intro.md b/website/i18n/zh/docusaurus-plugin-content-docs/current/00-about-stonedb/intro.md index 2b295ed94..a5f4493de 100644 --- a/website/i18n/zh/docusaurus-plugin-content-docs/current/00-about-stonedb/intro.md +++ b/website/i18n/zh/docusaurus-plugin-content-docs/current/00-about-stonedb/intro.md @@ -7,7 +7,7 @@ sidebar_position: 1.1 StoneDB 是由石原子科技公司自主设计、研发的国内首款基于 MySQL 内核打造的开源 HTAP(Hybrid Transactional and Analytical Processing)融合型数据库,可实现与 MySQL 的无缝切换。StoneDB 具备超高性能、实时分析等特点,为用户提供一站式 HTAP 解决方案。 -StoneDB 100% 兼容 MySQL 5.6、5.7 协议和 MySQL 生态等重要特性,支持 MySQL 常用的功能及语法,支持 MySQL 生态中的系统工具和客户端,如 Navicat、Workbench、mysqldump、mydumper。由于 100% 兼容 MySQL,因此 StoneDB 的所有工作负载都可以继续使用 MySQL 数据库体系运行。 +StoneDB 100% 兼容 MySQL 5.6、5.7、8.0 协议和 MySQL 生态等重要特性,支持 MySQL 常用的功能及语法,支持 MySQL 生态中的系统工具和客户端,如 Navicat、Workbench、mysqldump、mydumper。由于 100% 兼容 MySQL,因此 StoneDB 的所有工作负载都可以继续使用 MySQL 数据库体系运行。 StoneDB 专门针对 OLAP 应用程序进行了设计和优化,支持百亿数据场景下进行高性能、多维度字段组合的复杂查询,相对比社区版的 MySQL,其查询速度提升了十倍以上。 diff --git a/website/i18n/zh/docusaurus-plugin-content-docs/current/00-about-stonedb/limits.md b/website/i18n/zh/docusaurus-plugin-content-docs/current/00-about-stonedb/limits.md index 43d6acdac..a256ace28 100644 --- a/website/i18n/zh/docusaurus-plugin-content-docs/current/00-about-stonedb/limits.md +++ b/website/i18n/zh/docusaurus-plugin-content-docs/current/00-about-stonedb/limits.md @@ -4,7 +4,7 @@ sidebar_position: 1.3 --- # 使用限制 -StoneDB 100% 兼容 MySQL 5.6、5.7 协议和 MySQL 生态等重要特性,支持 MySQL 常用的功能及语法,但由于 StoneDB 本身的一些特性,部分操作和功能尚未得到支持,以下列出的是不兼容 MySQL 的操作和功能。 +StoneDB 100% 兼容 MySQL 5.6、5.7、8.0 协议和 MySQL 生态等重要特性,支持 MySQL 常用的功能及语法,但由于 StoneDB 本身的一些特性,部分操作和功能尚未得到支持,以下列出的是不兼容 MySQL 的操作和功能。 # 不支持的DDL 1. optimize table diff --git a/website/i18n/zh/docusaurus-plugin-content-docs/current/09-FAQ/stonedb-faq.md b/website/i18n/zh/docusaurus-plugin-content-docs/current/09-FAQ/stonedb-faq.md index 542f6aeac..f9c34880e 100644 --- a/website/i18n/zh/docusaurus-plugin-content-docs/current/09-FAQ/stonedb-faq.md +++ b/website/i18n/zh/docusaurus-plugin-content-docs/current/09-FAQ/stonedb-faq.md @@ -6,7 +6,7 @@ sidebar_position: 10.2 # 产品FAQ ## StoneDB与MySQL的兼容性如何? -StoneDB 高度兼容 MySQL 5.6、5.7 协议和 MySQL 生态等重要特性,支持 MySQL 常用的功能及语法。 +StoneDB 高度兼容 MySQL 5.6、5.7、8.0 协议和 MySQL 生态等重要特性,支持 MySQL 常用的功能及语法。 由于 StoneDB 本身的一些特性,部分操作和功能尚未得到支持,如不支持创建索引、不支持删除数据等。 ## StoneDB有自己的优化器吗? StoneDB 是在原生的 MySQL 加入的存储引擎,StoneDB 有自己的优化器,但 StoneDB 实际也会利用原生的 MySQL 的优化器做一些查询解析和重写机制。 From 186f6946d7f3ba17256d4968f12e7c14f8a2753a Mon Sep 17 00:00:00 2001 From: LiMK Date: Sun, 25 Jun 2023 16:14:42 +0800 Subject: [PATCH 05/10] wokflow(codecov): Filter out excess code files --- .github/workflows/pull_requests.yml | 4 +++- codecov.yml | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pull_requests.yml b/.github/workflows/pull_requests.yml index 565be0624..08917c88f 100644 --- a/.github/workflows/pull_requests.yml +++ b/.github/workflows/pull_requests.yml @@ -133,8 +133,10 @@ jobs: - name: create coverage.info run: | + cat codecov.yml | curl --data-binary @- https://codecov.io/validate cd build - lcov --capture --directory . --output-file coverage.info --test-name coverage + lcov --capture --directory . --output-file coverage_tmp.info --test-name coverage + lcov --remove coverage_tmp.info '*/boost_1_66_0/*' '*/extra/*' -o coverage.info - name: Codecov uses: codecov/codecov-action@v3.1.4 diff --git a/codecov.yml b/codecov.yml index f3547a439..45337d73b 100644 --- a/codecov.yml +++ b/codecov.yml @@ -1,3 +1,3 @@ ignore: -- ^stonedb/include/boost_1_66_0.* -- ^stonedb/extra.* \ No newline at end of file +- "include/boost_1_66_0" +- "extra" \ No newline at end of file From 7d03695b8a8554b35ec78e31b952c2ac1de6cdd5 Mon Sep 17 00:00:00 2001 From: LiMK Date: Mon, 26 Jun 2023 15:08:59 +0800 Subject: [PATCH 06/10] workflow(coverage): Update the lcov running logic --- .github/workflows/pull_requests.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/pull_requests.yml b/.github/workflows/pull_requests.yml index 08917c88f..cabfc9cac 100644 --- a/.github/workflows/pull_requests.yml +++ b/.github/workflows/pull_requests.yml @@ -135,8 +135,7 @@ jobs: run: | cat codecov.yml | curl --data-binary @- https://codecov.io/validate cd build - lcov --capture --directory . --output-file coverage_tmp.info --test-name coverage - lcov --remove coverage_tmp.info '*/boost_1_66_0/*' '*/extra/*' -o coverage.info + lcov --capture --directory . --output-file coverage.info --test-name coverage - name: Codecov uses: codecov/codecov-action@v3.1.4 From 73dcfec8b8cc4af83a880324a5cd1f019b107a88 Mon Sep 17 00:00:00 2001 From: Double0101 Date: Mon, 19 Jun 2023 14:59:15 +0800 Subject: [PATCH 07/10] fix(tianmu): default value of the field take unaffect in load #1865 Cause: in the function ParsingStrategy::ParseResult ParsingStrategy::GetOneRow field->val_str(str) cannot distinguish 0 and NULL value. Solution: Check whether field's default value is NULL. --- mysql-test/suite/tianmu/r/issue1865.result | 28 +++++++++++++++++++ .../suite/tianmu/std_data/issue1865.dat | 2 ++ mysql-test/suite/tianmu/t/issue1865.test | 27 ++++++++++++++++++ storage/tianmu/loader/parsing_strategy.cpp | 9 ++++-- 4 files changed, 63 insertions(+), 3 deletions(-) create mode 100644 mysql-test/suite/tianmu/r/issue1865.result create mode 100644 mysql-test/suite/tianmu/std_data/issue1865.dat create mode 100644 mysql-test/suite/tianmu/t/issue1865.test diff --git a/mysql-test/suite/tianmu/r/issue1865.result b/mysql-test/suite/tianmu/r/issue1865.result new file mode 100644 index 000000000..0db382140 --- /dev/null +++ b/mysql-test/suite/tianmu/r/issue1865.result @@ -0,0 +1,28 @@ +DROP DATABASE IF EXISTS issue1865_test_db; +CREATE DATABASE issue1865_test_db; +create table t1 (a int default 100, b int, c varchar(60))engine=tianmu; +load data infile 'MYSQL_TEST_DIR/suite/tianmu/std_data/issue1865.dat' into table t1 (a, c); +select * from t1; +a b c +NULL NULL 10 +NULL NULL 15 +alter table t1 alter column b drop default; +alter table t1 alter column b set default 10; +load data infile 'MYSQL_TEST_DIR/suite/tianmu/std_data/issue1865.dat' into table t1 (a, c); +select * from t1; +a b c +NULL NULL 10 +NULL NULL 15 +NULL 10 10 +NULL 10 15 +alter table t1 modify c text; +load data infile 'MYSQL_TEST_DIR/suite/tianmu/std_data/issue1865.dat' into table t1 (a, c); +select * from t1; +a b c +NULL NULL 10 +NULL NULL 15 +NULL 10 10 +NULL 10 15 +NULL 10 10 +NULL 10 15 +DROP DATABASE issue1865_test_db; diff --git a/mysql-test/suite/tianmu/std_data/issue1865.dat b/mysql-test/suite/tianmu/std_data/issue1865.dat new file mode 100644 index 000000000..a70a059c2 --- /dev/null +++ b/mysql-test/suite/tianmu/std_data/issue1865.dat @@ -0,0 +1,2 @@ +\N 10 +\N 15 diff --git a/mysql-test/suite/tianmu/t/issue1865.test b/mysql-test/suite/tianmu/t/issue1865.test new file mode 100644 index 000000000..b832ac6e1 --- /dev/null +++ b/mysql-test/suite/tianmu/t/issue1865.test @@ -0,0 +1,27 @@ +--source include/have_tianmu.inc + +--disable_warnings +DROP DATABASE IF EXISTS issue1865_test_db; +--enable_warnings + +CREATE DATABASE issue1865_test_db; + +create table t1 (a int default 100, b int, c varchar(60))engine=tianmu; + +--replace_result $MYSQL_TEST_DIR MYSQL_TEST_DIR +eval load data infile '$MYSQL_TEST_DIR/suite/tianmu/std_data/issue1865.dat' into table t1 (a, c); +select * from t1; + +alter table t1 alter column b drop default; +alter table t1 alter column b set default 10; + +--replace_result $MYSQL_TEST_DIR MYSQL_TEST_DIR +eval load data infile '$MYSQL_TEST_DIR/suite/tianmu/std_data/issue1865.dat' into table t1 (a, c); +select * from t1; + +alter table t1 modify c text; +--replace_result $MYSQL_TEST_DIR MYSQL_TEST_DIR +eval load data infile '$MYSQL_TEST_DIR/suite/tianmu/std_data/issue1865.dat' into table t1 (a, c); +select * from t1; + +DROP DATABASE issue1865_test_db; diff --git a/storage/tianmu/loader/parsing_strategy.cpp b/storage/tianmu/loader/parsing_strategy.cpp index fd53e8e3a..e2d4f9b6c 100644 --- a/storage/tianmu/loader/parsing_strategy.cpp +++ b/storage/tianmu/loader/parsing_strategy.cpp @@ -433,9 +433,12 @@ ParsingStrategy::ParseResult ParsingStrategy::GetOneRow(const char *const buf, s if (!first_row_prepared_) { std::string field_name(field->field_name); - str = new (thd_->mem_root) String(MAX_FIELD_WIDTH); - String *res = field->val_str(str); - DEBUG_ASSERT(res); + str = new (thd_->mem_root) String(); + if (!field->is_null()) { + str->real_alloc(MAX_FIELD_WIDTH); + String *res = field->val_str(str); + DEBUG_ASSERT(res); + } vec_field_Str_list_.push_back(str); vec_field_num_to_index_.push_back(0); map_field_name_to_index_[field_name] = i; From a4600c4515ebc9b917b92d526a4393bc21ee0e99 Mon Sep 17 00:00:00 2001 From: RingsC Date: Thu, 15 Jun 2023 21:10:49 +0800 Subject: [PATCH 08/10] fix(tianmu): To support union(all) the statement which is without from clause 1: To fixup unsupport union or union all a sql statement which is without from clause. 2: Re-format some codes and functions. --- mysql-test/suite/tianmu/r/issue1860.result | 55 + mysql-test/suite/tianmu/t/issue1860.test | 43 + storage/tianmu/core/engine_execute.cpp | 130 +- storage/tianmu/core/just_a_table.h | 4 + storage/tianmu/core/query.cpp | 37 +- storage/tianmu/core/query_compile.cpp | 2674 +++++++++-------- storage/tianmu/core/temp_table.cpp | 210 +- storage/tianmu/core/temp_table.h | 9 +- storage/tianmu/core/temp_table_com.cpp | 16 +- storage/tianmu/core/temp_table_low.cpp | 51 +- storage/tianmu/handler/ha_tianmu.cpp | 2 +- storage/tianmu/index/multi_index.cpp | 993 +++--- storage/tianmu/index/multi_index.h | 373 ++- .../optimizer/compile/compiled_query.cpp | 5 +- .../tianmu/optimizer/compile/compiled_query.h | 2 +- storage/tianmu/sql/ha_my_tianmu.cpp | 11 +- storage/tianmu/sql/ha_my_tianmu.h | 5 +- storage/tianmu/vc/column_bin_encoder.cpp | 6 +- 18 files changed, 2411 insertions(+), 2215 deletions(-) create mode 100644 mysql-test/suite/tianmu/r/issue1860.result create mode 100644 mysql-test/suite/tianmu/t/issue1860.test diff --git a/mysql-test/suite/tianmu/r/issue1860.result b/mysql-test/suite/tianmu/r/issue1860.result new file mode 100644 index 000000000..8486c0bed --- /dev/null +++ b/mysql-test/suite/tianmu/r/issue1860.result @@ -0,0 +1,55 @@ +DROP DATABASE IF EXISTS issue1860_test; +CREATE DATABASE issue1860_test; +USE issue1860_test; +CREATE TABLE tt(id decimal(18,0), dt datetime) ENGINE =TIANMU; +INSERT INTO tt VALUES(1111.0, '2023-01-01'); +SELECT id FROM tt UNION SELECT 2222 c1 FROM dual; +id +1111 +2222 +INSERT INTO tt VALUES(2222.0, '2023-02-02'); +SELECT id FROM tt UNION SELECT 2222 c1 FROM dual; +id +1111 +2222 +SELECT id FROM tt UNION ALL SELECT 2222 c1 FROM dual; +id +1111 +2222 +2222 +SELECT id FROM tt UNION SELECT 2222 ; +id +1111 +2222 +sELECT id FROM tt UNION ALL SELECT 2222; +id +1111 +2222 +2222 +SELECT id, dt FROM tt UNION SELECT 2222, '2022-01-01'; +id dt +1111 2023-01-01 00:00:00 +2222 2023-02-02 00:00:00 +2222 2022-01-01 +SELECT id, dt FROM tt UNION SELECT 2222, str_to_date('2022-02-03', '%Y-%m-%d'); +id dt +1111 2023-01-01 00:00:00 +2222 2023-02-02 00:00:00 +2222 2022-02-03 00:00:00 +SELECT id, dt FROM tt UNION SELECT 2222, str_to_date('2023-02-02', '%Y-%m-%d'); +id dt +1111 2023-01-01 00:00:00 +2222 2023-02-02 00:00:00 +SELECT dt FROM tt UNION SELECT 2222; +ERROR HY000: wrong types of columns +SELECT dt FROM tt UNION SELECT '2222'; +dt +2023-01-01 00:00:00 +2023-02-02 00:00:00 +2222 +SELECT * FROM tt UNION SELECT 222; +ERROR 21000: The used SELECT statements have a different number of columns +SELECT * FROM tt UNION ALL SELECT 222; +ERROR 21000: The used SELECT statements have a different number of columns +DROP TABLE tt; +DROP DATABASE issue1860_test; diff --git a/mysql-test/suite/tianmu/t/issue1860.test b/mysql-test/suite/tianmu/t/issue1860.test new file mode 100644 index 000000000..3077ad01d --- /dev/null +++ b/mysql-test/suite/tianmu/t/issue1860.test @@ -0,0 +1,43 @@ +--source include/have_tianmu.inc + +--disable_warnings +DROP DATABASE IF EXISTS issue1860_test; + +CREATE DATABASE issue1860_test; +USE issue1860_test; +--enable_warnings + + +CREATE TABLE tt(id decimal(18,0), dt datetime) ENGINE =TIANMU; +INSERT INTO tt VALUES(1111.0, '2023-01-01'); + +SELECT id FROM tt UNION SELECT 2222 c1 FROM dual; + +INSERT INTO tt VALUES(2222.0, '2023-02-02'); + +SELECT id FROM tt UNION SELECT 2222 c1 FROM dual; +SELECT id FROM tt UNION ALL SELECT 2222 c1 FROM dual; + +SELECT id FROM tt UNION SELECT 2222 ; +sELECT id FROM tt UNION ALL SELECT 2222; + +SELECT id, dt FROM tt UNION SELECT 2222, '2022-01-01'; +SELECT id, dt FROM tt UNION SELECT 2222, str_to_date('2022-02-03', '%Y-%m-%d'); +SELECT id, dt FROM tt UNION SELECT 2222, str_to_date('2023-02-02', '%Y-%m-%d'); + +#ERROR 1105 (HY000): wrong types of columns +--error 1105 +SELECT dt FROM tt UNION SELECT 2222; +SELECT dt FROM tt UNION SELECT '2222'; + +#ERROR 1222 (21000): The used SELECT statements have a different number of columns +--error 1222 +SELECT * FROM tt UNION SELECT 222; +--error 1222 +SELECT * FROM tt UNION ALL SELECT 222; + + +DROP TABLE tt; + +DROP DATABASE issue1860_test; + diff --git a/storage/tianmu/core/engine_execute.cpp b/storage/tianmu/core/engine_execute.cpp index 0f8fd87cc..78fe836bd 100644 --- a/storage/tianmu/core/engine_execute.cpp +++ b/storage/tianmu/core/engine_execute.cpp @@ -74,12 +74,9 @@ class KillTimer { /* Handles a single query -If an error appears during query preparation/optimization -query structures are cleaned up and the function returns information about the -error through res'. If the query can not be compiled by Tianmu engine -QueryRouteTo::kToMySQL is returned and MySQL engine continues query -execution. -*/ +If an error appears during query preparation/optimization query structures are cleaned up and the function returns +information about the error through res'. If the query can not be compiled by Tianmu engine QueryRouteTo::kToMySQL is +returned and MySQL engine continues query execution.*/ QueryRouteTo Engine::HandleSelect(THD *thd, LEX *lex, Query_result *&result, ulong setup_tables_done_option, int &res, int &is_optimize_after_tianmu, int &tianmu_free_join, int with_insert) { KillTimer timer(thd, tianmu_sysvar_max_execution_time); @@ -117,14 +114,12 @@ QueryRouteTo Engine::HandleSelect(THD *thd, LEX *lex, Query_result *&result, ulo core::Engine *eng = reinterpret_cast(tianmu_hton->data); QueryRouteTo route = QueryRouteTo::kToTianmu; SELECT_LEX *save_current_select = lex->current_select(); - List derived_optimized; // collection to remember derived - // tables that are optimized + List derived_optimized; // collection to remember derived tables that are optimized + if (thd->fill_derived_tables() && lex->derived_tables) { - // Derived tables are processed completely in the function - // open_and_lock_tables(...). To avoid execution of derived tables in - // open_and_lock_tables(...) the function mysql_derived_filling(..) - // optimizing and executing derived tables is passed over, then optimization - // of derived tables must go here. + // Derived tables are processed completely in the function open_and_lock_tables(...). To avoid execution of derived + // tables in open_and_lock_tables(...) the function mysql_derived_filling(..) optimizing and executing derived + // tables is passed over, then optimization of derived tables must go here. res = FALSE; int tianmu_free_join = FALSE; lex->thd->derived_tables_processing = TRUE; @@ -135,16 +130,15 @@ QueryRouteTo Engine::HandleSelect(THD *thd, LEX *lex, Query_result *&result, ulo // optimize derived table SELECT_LEX *first_select = cursor->derived_unit()->first_select(); if (first_select->next_select() && first_select->next_select()->linkage == UNION_TYPE) { //?? only if union - if (lex->is_explain() || cursor->derived_unit()->item) { //??called for explain - // OR there is subselect(?) + if (lex->is_explain() || cursor->derived_unit()->item) { //??called for explain OR there is subselect(?) route = QueryRouteTo::kToMySQL; goto ret_derived; } + if (!cursor->derived_unit()->is_executed() || - cursor->derived_unit()->uncacheable) { //??not already executed (not - // materialized?) - // OR not cacheable (meaning not yet in cache, i.e. not - // materialized it seems to boil down to NOT MATERIALIZED(?) + cursor->derived_unit()->uncacheable) { //??not already executed (not materialized?) + // OR not cacheable (meaning not yet in cache, i.e. not materialized it seems to boil down to NOT + // MATERIALIZED(?) res = cursor->derived_unit()->optimize_for_tianmu(); //===exec() derived_optimized.push_back(cursor->derived_unit()); } @@ -152,17 +146,21 @@ QueryRouteTo Engine::HandleSelect(THD *thd, LEX *lex, Query_result *&result, ulo cursor->derived_unit()->set_limit(first_select); if (cursor->derived_unit()->select_limit_cnt == HA_POS_ERROR) first_select->remove_base_options(OPTION_FOUND_ROWS); + lex->set_current_select(first_select); int optimize_derived_after_tianmu = FALSE; res = optimize_select( thd, ulong(first_select->active_options() | thd->variables.option_bits | SELECT_NO_UNLOCK), (Query_result *)cursor->derived_result, first_select, optimize_derived_after_tianmu, tianmu_free_join); + if (optimize_derived_after_tianmu) derived_optimized.push_back(cursor->derived_unit()); } + lex->set_current_select(save_current_select); if (!res && tianmu_free_join) // no error & route = QueryRouteTo::kToMySQL; + if (res || route == QueryRouteTo::kToMySQL) goto ret_derived; } @@ -172,6 +170,7 @@ QueryRouteTo Engine::HandleSelect(THD *thd, LEX *lex, Query_result *&result, ulo se = dynamic_cast(result); if (se != nullptr) result = new exporter::select_tianmu_export(se); + // prepare, optimize and execute the main query select_lex = lex->select_lex; unit = lex->unit; @@ -179,11 +178,9 @@ QueryRouteTo Engine::HandleSelect(THD *thd, LEX *lex, Query_result *&result, ulo if (!(res = unit->prepare(thd, result, (ulong)(SELECT_NO_UNLOCK | setup_tables_done_option), 0))) { // similar to mysql_union(...) from sql_union.cpp - /* FIXME: create_table is private in mysql5.6 - select_create* sc = dynamic_cast(result); - if (sc && sc->create_table->table && sc->create_table->table->db_stat - != 0) { my_error(ER_TABLE_EXISTS_ERROR, MYF(0), - sc->create_table->table_name); res = 1; } else + /* FIXME: create_table is private in mysql5.6 select_create* sc = dynamic_cast(result); + if (sc && sc->create_table->table && sc->create_table->table->db_stat != 0) { my_error(ER_TABLE_EXISTS_ERROR, + MYF(0), sc->create_table->table_name); res = 1; } else */ if (lex->is_explain() || unit->item) // explain or sth was already computed - go to mysql route = QueryRouteTo::kToMySQL; @@ -196,15 +193,10 @@ QueryRouteTo Engine::HandleSelect(THD *thd, LEX *lex, Query_result *&result, ulo route = eng->Execute(unit->thd, unit->thd->lex, result, unit); if (route == QueryRouteTo::kToMySQL) { if (in_case_of_failure_can_go_to_mysql) - if (old_executed) - unit->set_executed(); - else - unit->reset_executed(); - + (old_executed) ? unit->set_executed() : unit->reset_executed(); else { const char *err_msg = - "Error: Query syntax not implemented in Tianmu, can " - "export " + "Error: Query syntax not implemented in Tianmu, can export " "only to MySQL format (set TIANMU_DATAFORMAT to 'MYSQL')."; TIANMU_LOG(LogCtl_Level::ERROR, err_msg); my_message(ER_SYNTAX_ERROR, err_msg, MYF(0)); @@ -218,37 +210,34 @@ QueryRouteTo Engine::HandleSelect(THD *thd, LEX *lex, Query_result *&result, ulo } } } + if (res || route == QueryRouteTo::kToTianmu) { res |= (int)unit->cleanup(0); is_optimize_after_tianmu = FALSE; } } else { - unit->set_limit(unit->global_parameters()); // the fragment of original - // handle_select(...) - //(until the first part of optimization) - // used for non-union select + unit->set_limit(unit->global_parameters()); // the fragment of original handle_select(...) + //(until the first part of optimization) used for non-union select - //'options' of mysql_select will be set in JOIN, as far as JOIN for - // every PS/SP execution new, we will not need reset this flag if - // setup_tables_done_option changed for next rexecution + //'options' of mysql_select will be set in JOIN, as far as JOIN for every PS/SP execution new, we will not need + // reset this flag if setup_tables_done_option changed for next rexecution int err; err = optimize_select(thd, ulong(select_lex->active_options() | thd->variables.option_bits | setup_tables_done_option), result, select_lex, is_optimize_after_tianmu, tianmu_free_join); - // RCBase query engine entry point + // query engine entry point if (!err) { try { route = Execute(thd, lex, result); if (route == QueryRouteTo::kToMySQL && !in_case_of_failure_can_go_to_mysql) { TIANMU_LOG(LogCtl_Level::ERROR, - "Error: Query syntax not implemented in Tianmu, can export " - "only to MySQL format (set TIANMU_DATAFORMAT to 'MYSQL')."); + "Error: Query syntax not implemented in Tianmu, can export only to MySQL format (set " + "TIANMU_DATAFORMAT to 'MYSQL')."); my_message(ER_SYNTAX_ERROR, - "Query syntax not implemented in Tianmu, can export only " - "to MySQL " - "format (set TIANMU_DATAFORMAT to 'MYSQL').", + "Query syntax not implemented in Tianmu, can export only to MySQL format (set TIANMU_DATAFORMAT " + "to 'MYSQL').", MYF(0)); throw ReturnMeToMySQLWithError(); } @@ -271,17 +260,16 @@ QueryRouteTo Engine::HandleSelect(THD *thd, LEX *lex, Query_result *&result, ulo } if (select_lex->join && Query::IsLOJ(select_lex->join_list)) is_optimize_after_tianmu = TRUE; // optimize partially (phase=Doneoptimization), since part of LOJ - // optimization was already done - res |= (int)thd->is_error(); // the ending of original handle_select(...) */ + // optimization was already done // the ending of original handle_select(...) */ + res |= (int)thd->is_error(); if (unlikely(res)) { // If we had a another error reported earlier then this will be ignored // result->send_error(ER_UNKNOWN_ERROR, ER(ER_UNKNOWN_ERROR)); result->abort_result_set(); } if (se != nullptr) { - // free the tianmu export object, - // restore the original mysql export object - // and prepare if it is expected to be prepared + // free the tianmu export object, restore the original mysql export object and prepare if it is expected to be + // prepared if (!select_lex->next_select() && select_lex->join != 0 && select_lex->query_result() == result) { select_lex->set_query_result(se); if (((exporter::select_tianmu_export *)result)->IsPrepared()) @@ -292,9 +280,8 @@ QueryRouteTo Engine::HandleSelect(THD *thd, LEX *lex, Query_result *&result, ulo result = se; } ret_derived: - // if the query is redirected to MySQL engine - // optimization of derived tables must be completed - // and derived tables must be filled + // if the query is redirected to MySQL engine optimization of derived tables must be completed and derived tables must + // be filled if (route == QueryRouteTo::kToMySQL) { for (SELECT_LEX *sl = lex->all_selects_list; sl; sl = sl->next_select_in_list()) for (TABLE_LIST *cursor = sl->get_table_list(); cursor; cursor = cursor->next_local) @@ -346,12 +333,14 @@ int optimize_select(THD *thd, ulong select_options, Query_result *result, SELECT } if (!(join = new JOIN(thd, select_lex))) return TRUE; /* purecov: inspected */ + select_lex->set_join(join); } join->best_rowcount = 2; is_optimize_after_tianmu = TRUE; if ((err = join->optimize(OptimizePhase::Before_LOJ_Transform))) return err; + return FALSE; } @@ -361,6 +350,7 @@ QueryRouteTo Engine::Execute(THD *thd, LEX *lex, Query_result *result_output, SE DEBUG_ASSERT(thd->lex == lex); SELECT_LEX *selects_list = lex->select_lex; SELECT_LEX *last_distinct = nullptr; + if (unit_for_union != nullptr) last_distinct = unit_for_union->union_distinct; @@ -492,6 +482,7 @@ QueryRouteTo Engine::Execute(THD *thd, LEX *lex, Query_result *result_output, SE std::string table_path = Engine::GetTablePath(((Query_tables_list *)lex)->query_tables->table); rct = current_txn_->GetTableByPathIfExists(table_path); } + if ((unit_for_union != nullptr) && (lex->sql_command != SQLCOM_CREATE_TABLE)) { // for exclude CTAS int res = result_output->prepare(unit_for_union->item_list, unit_for_union); if (res) { @@ -499,15 +490,16 @@ QueryRouteTo Engine::Execute(THD *thd, LEX *lex, Query_result *result_output, SE my_message(ER_UNKNOWN_ERROR, "Tianmu: unsupported UNION", MYF(0)); throw ReturnMeToMySQLWithError(); } - if (export_file_name) - sender.reset(new ResultExportSender(unit_for_union->thd, result_output, unit_for_union->item_list)); - else - sender.reset(new ResultSender(unit_for_union->thd, result_output, unit_for_union->item_list)); + + sender.reset(export_file_name + ? new ResultExportSender(unit_for_union->thd, result_output, unit_for_union->item_list) + : new ResultSender(unit_for_union->thd, result_output, unit_for_union->item_list)); + } else { - if (export_file_name) - sender.reset(new ResultExportSender(selects_list->master_unit()->thd, result_output, selects_list->item_list)); - else - sender.reset(new ResultSender(selects_list->master_unit()->thd, result_output, selects_list->item_list)); + sender.reset( + export_file_name + ? new ResultExportSender(selects_list->master_unit()->thd, result_output, selects_list->item_list) + : new ResultSender(selects_list->master_unit()->thd, result_output, selects_list->item_list)); } TempTable *result = query.Preexecute(cqu, sender.get()); @@ -529,6 +521,7 @@ QueryRouteTo Engine::Execute(THD *thd, LEX *lex, Query_result *result_output, SE rct.reset(); } sender.reset(); + } catch (...) { bool with_error = false; if (sender) { @@ -540,6 +533,7 @@ QueryRouteTo Engine::Execute(THD *thd, LEX *lex, Query_result *result_output, SE } return (handle_exceptions(thd, current_txn_, with_error)); } + return QueryRouteTo::kToTianmu; } @@ -551,12 +545,12 @@ QueryRouteTo handle_exceptions(THD *thd, Transaction *cur_connection, bool with_ throw; } catch (common::NotImplementedException const &x) { tianmu_control_.lock(cur_connection->GetThreadID()) << "Switched to MySQL: " << x.what() << system::unlock; - my_message(ER_UNKNOWN_ERROR, - (std::string("The query includes syntax that is not supported " - "by the storage engine. Tianmu: ") + - x.what()) - .c_str(), - MYF(0)); + my_message( + ER_UNKNOWN_ERROR, + (std::string("The query includes syntax that is not supported by the storage engine. Tianmu: ") + x.what()) + .c_str(), + MYF(0)); + if (with_error) { std::string msg(x.what()); msg.append(" Can't switch to MySQL execution path"); @@ -598,6 +592,7 @@ QueryRouteTo handle_exceptions(THD *thd, Transaction *cur_connection, bool with_ } return QueryRouteTo::kToMySQL; } + } // namespace core } // namespace Tianmu @@ -648,8 +643,7 @@ int st_select_lex_unit::optimize_for_tianmu() { set_limit(sl); if (sl == global_parameters() || thd->lex->is_explain()) { offset_limit_cnt = 0; - // We can't use LIMIT at this stage if we are using ORDER BY for the - // whole query + // We can't use LIMIT at this stage if we are using ORDER BY for the whole query if (sl->order_list.first || thd->lex->is_explain()) select_limit_cnt = HA_POS_ERROR; } diff --git a/storage/tianmu/core/just_a_table.h b/storage/tianmu/core/just_a_table.h index ab445b7bc..a73e7d81b 100644 --- a/storage/tianmu/core/just_a_table.h +++ b/storage/tianmu/core/just_a_table.h @@ -28,7 +28,10 @@ namespace types { class BString; } // namespace types namespace core { + enum class TType { TABLE, TEMP_TABLE }; +enum class TableSubType : int { UN_KNOWN = 0, DUAL, CONST, NORMAL }; + class Transaction; class PhysicalColumn; class Filter; @@ -58,6 +61,7 @@ class JustATable : public std::enable_shared_from_this { virtual const ColumnType &GetColumnType(int n_a) = 0; virtual uint32_t Getpackpower() const = 0; + //! Returns column value in the form required by complex expressions ValueOrNull GetComplexValue(const int64_t obj, const int attr); diff --git a/storage/tianmu/core/query.cpp b/storage/tianmu/core/query.cpp index 07a6c1047..b86d9e28c 100644 --- a/storage/tianmu/core/query.cpp +++ b/storage/tianmu/core/query.cpp @@ -586,26 +586,18 @@ TempTable *Query::Preexecute(CompiledQuery &qu, ResultSender *sender, [[maybe_un std::shared_ptr t1_ptr, t2_ptr, t3_ptr; if (step.t1.n != common::NULL_VALUE_32) { - if (step.t1.n >= 0) - t1_ptr = Table(step.t1.n); // normal table - else { - t1_ptr = ta[-step.t1.n - 1]; // TempTable - } + // normal table or TempTable + t1_ptr = (step.t1.n >= 0) ? Table(step.t1.n) : ta[-step.t1.n - 1]; } + if (step.t2.n != common::NULL_VALUE_32) { - if (step.t2.n >= 0) - t2_ptr = Table(step.t2.n); // normal table - else { - t2_ptr = ta[-step.t2.n - 1]; // TempTable - } + t2_ptr = (step.t2.n >= 0) ? Table(step.t2.n) : ta[-step.t2.n - 1]; } + if (step.t3.n != common::NULL_VALUE_32) { - if (step.t3.n >= 0) - t3_ptr = Table(step.t3.n); // normal table - else { - t3_ptr = ta[-step.t3.n - 1]; // TempTable - } + t3_ptr = (step.t3.n >= 0) ? Table(step.t3.n) : ta[-step.t3.n - 1]; } + // Some technical information if (step.alias && std::strcmp(step.alias, "roughstats") == 0) { // magical word (passed as table alias) to display statistics @@ -623,13 +615,18 @@ TempTable *Query::Preexecute(CompiledQuery &qu, ResultSender *sender, [[maybe_un case CompiledQuery::StepType::TABLE_ALIAS: ta[-step.t1.n - 1] = t2_ptr; break; - case CompiledQuery::StepType::TMP_TABLE: + case CompiledQuery::StepType::TMP_TABLE: { DEBUG_ASSERT(step.t1.n < 0); - ta[-step.t1.n - 1] = step.n1 - ? TempTable::Create(ta[-step.tables1[0].n - 1].get(), step.tables1[0].n, this, true) - : TempTable::Create(ta[-step.tables1[0].n - 1].get(), step.tables1[0].n, this); + TableSubType sub_type = TableSubType::NORMAL; + if (step.n2 == static_cast::type>(TableSubType::DUAL)) { + sub_type = TableSubType::DUAL; + } + + ta[-step.t1.n - 1] = + step.n1 ? TempTable::Create(ta[-step.tables1[0].n - 1].get(), step.tables1[0].n, this, sub_type, true) + : TempTable::Create(ta[-step.tables1[0].n - 1].get(), step.tables1[0].n, this, sub_type); ((TempTable *)ta[-step.t1.n - 1].get())->ReserveVirtColumns(qu.NumOfVirtualColumns(step.t1)); - break; + } break; case CompiledQuery::StepType::CREATE_CONDS: DEBUG_ASSERT(step.t1.n < 0); if (step.ex_op == common::ExtraOperation::EX_COND_PUSH) { diff --git a/storage/tianmu/core/query_compile.cpp b/storage/tianmu/core/query_compile.cpp index 6781fccc9..0ce07e4e2 100644 --- a/storage/tianmu/core/query_compile.cpp +++ b/storage/tianmu/core/query_compile.cpp @@ -1,1332 +1,1342 @@ -/* Copyright (c) 2022 StoneAtom, Inc. All rights reserved. - Use is subject to license terms - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA -*/ - -#include - -#include "common/mysql_gate.h" -#include "core/engine.h" -#include "core/mysql_expression.h" -#include "core/query.h" -#include "core/transaction.h" -#include "optimizer/compile/compilation_tools.h" -#include "optimizer/compile/compiled_query.h" - -namespace Tianmu { -namespace core { -QueryRouteTo TableUnmysterify(TABLE_LIST *tab, const char *&database_name, const char *&table_name, - const char *&table_alias, const char *&table_path) { - ASSERT_MYSQL_STRING(tab->table->s->db); - ASSERT_MYSQL_STRING(tab->table->s->table_name); - ASSERT_MYSQL_STRING(tab->table->s->path); - - database_name = tab->table->s->db.str; - if (tab->referencing_view) - table_name = tab->referencing_view->table_name; - else - table_name = tab->table->s->table_name.str; - table_alias = tab->alias; - table_path = tab->table->s->path.str; - - return QueryRouteTo::kToTianmu; -} - -QueryRouteTo JudgeErrors(SELECT_LEX *sl) { - if (!sl->join) { - return QueryRouteTo::kToMySQL; - } - - /* gone with mysql5.6 - if(sl->join && sl->join->procedure) { - my_message(ER_SYNTAX_ERROR, "Tianmu specific error: No PROCEDURE - syntax supported", MYF(0)); throw ReturnMeToMySQLWithError(); - } - */ - - if (sl->offset_limit) - if (sl->offset_limit->type() != Item::INT_ITEM /*|| sl->offset_limit->val_int()*/) { - my_message(ER_SYNTAX_ERROR, "Tianmu specific error: Only numerical OFFSET supported", MYF(0)); - throw ReturnMeToMySQLWithError(); - } - - if (sl->select_limit) - if (sl->select_limit->type() != Item::INT_ITEM) { - my_message(ER_SYNTAX_ERROR, "Tianmu specific error: Only numerical LIMIT supported", MYF(0)); - throw ReturnMeToMySQLWithError(); - } - if (sl->olap == ROLLUP_TYPE) { - /*my_message(ER_SYNTAX_ERROR, "Tianmu specific error: WITH ROLLUP not - supported", MYF(0)); throw ReturnMeToMySQLWithError();*/ - return QueryRouteTo::kToMySQL; - } - - return QueryRouteTo::kToTianmu; -} - -void SetLimit(SELECT_LEX *sl, SELECT_LEX *gsl, int64_t &offset_value, int64_t &limit_value) { - if (sl->select_limit && (!gsl || sl->select_limit != gsl->select_limit)) { - limit_value = sl->select_limit->val_int(); - if (limit_value == UINT_MAX) { /* this value seems to be sometimes - automatically set by MYSQL to UINT_MAX*/ - limit_value = -1; // no limit set - offset_value = -1; - }; - }; - - if (limit_value) - offset_value = 0; - - if (sl->offset_limit && (!gsl || sl->offset_limit != gsl->offset_limit)) - offset_value = sl->offset_limit->val_int(); -} - -// Used in Query::Compile() to break compilation in the middle and make cleanup -// before returning -class CompilationError {}; - -QueryRouteTo Query::FieldUnmysterify(Item *item, const char *&database_name, const char *&table_name, - const char *&table_alias, const char *&table_path, const TABLE *&table_ptr, - const char *&field_name, const char *&field_alias) { - table_alias = EMPTY_TABLE_CONST_INDICATOR; - database_name = nullptr; - table_name = nullptr; - table_path = nullptr; - table_ptr = nullptr; - field_name = nullptr; - field_alias = nullptr; - - item = UnRef(item); - - Item_field *ifield; - switch (static_cast(item->type())) { - case static_cast(Item_tianmufield::enumTIANMUFiledItem::TIANMUFIELD_ITEM): - ifield = dynamic_cast(item)->OriginalItem(); - if (IsAggregationItem(ifield)) { - Item_sum *is = (Item_sum *)ifield; - if (is->get_arg_count() > 1) - return QueryRouteTo::kToMySQL; - Item *tmp_item = UnRef(is->get_arg(0)); - if (tmp_item->type() == Item::FIELD_ITEM) - ifield = (Item_field *)tmp_item; - else if (static_cast(tmp_item->type()) == - static_cast(Item_tianmufield::enumTIANMUFiledItem::TIANMUFIELD_ITEM)) - ifield = dynamic_cast(tmp_item)->OriginalItem(); - else { - return QueryRouteTo::kToMySQL; - } - } - break; - case Item::FIELD_ITEM: // regular select - ifield = (Item_field *)item; - break; - - case Item::SUM_FUNC_ITEM: { // min(k), max(k), count(), avg(k), sum - Item_sum *is = (Item_sum *)item; - if (is->get_arg_count() > 1) { - return QueryRouteTo::kToMySQL; - } - Item *tmp_item = UnRef(is->get_arg(0)); - if (tmp_item->type() == Item::FIELD_ITEM) - ifield = (Item_field *)tmp_item; - else if (static_cast(tmp_item->type()) == - static_cast(Item_tianmufield::enumTIANMUFiledItem::TIANMUFIELD_ITEM)) /* *CAUTION* comparision of - enumerators from different - enums */ - ifield = dynamic_cast(tmp_item)->OriginalItem(); - else - return QueryRouteTo::kToMySQL; - break; - } - case Item::FUNC_ITEM: // complex expressions - // if(WrapMysqlExpression(item, ¬_a_table_column) == - // WrapStatus::SUCCESS) - return QueryRouteTo::kToTianmu; - return QueryRouteTo::kToMySQL; - default: - // if(WrapMysqlExpression(item, ¬_a_table_column) == - // WrapStatus::SUCCESS) - return QueryRouteTo::kToTianmu; - return QueryRouteTo::kToMySQL; - }; - - /* - * By MW. Related to bug 1073. - * - * For views, 'table_alias' must be created as a concatenation - * of original view(s) name(s) and the table name - * - currently it's just the table name, which leads to ambiguity and errors - * when the same table is used in another place in the query. - * Note that there can be several nested views. - * - * To retrieve name(s) of the view(s) from which the current 'ifield' comes - * you may try the following expression: - * - * ifield->cached_table->table_list->belong_to_view->alias or - * ...->table_name or - * ifield->cached_table->table_list->referencing_view->alias or - * ...->table_name - * - * Here, 'belong_to_view' and 'referencing_view' are different - * if there are nested views. - * - * Probably ifield->cached_table could be also used to find - * 'database_name', 'table_name' and 'table_path' - * in a simpler way than currently. - */ - - Field *f = ifield->result_field; - - ASSERT_MYSQL_STRING(f->table->s->db); - ASSERT_MYSQL_STRING(f->table->s->table_name); - ASSERT_MYSQL_STRING(f->table->s->path); - - table_ptr = f->table; - table_alias = ifield->table_name; - database_name = f->table->s->db.str; - table_name = GetTableName(ifield); - table_path = f->table->s->path.str; - field_name = f->field_name; - field_alias = ifield->item_name.ptr(); - - return QueryRouteTo::kToTianmu; -} - -bool Query::FieldUnmysterify(Item *item, TabID &tab, AttrID &col) { - Item_field *ifield; - if (item->type() == Item_tianmufield::get_tianmuitem_type()) { - ifield = dynamic_cast(item)->OriginalItem(); - if (IsAggregationItem(ifield)) { - Item_sum *is = (Item_sum *)ifield; - if (is->get_arg_count() > 1) - return false; - Item *tmp_item = UnRef(is->get_arg(0)); - if (tmp_item->type() == Item::FIELD_ITEM) - ifield = (Item_field *)tmp_item; - else if (tmp_item->type() == Item_tianmufield::get_tianmuitem_type()) - ifield = dynamic_cast(tmp_item)->OriginalItem(); - else if (tmp_item->type() == Item::FUNC_ITEM) { - Item_tianmufield *tianmui = dynamic_cast(item); - tab.n = tianmui->varID[0].tab; - col.n = tianmui->varID[0].tab; - return true; - } else - return false; - } - } else if (item->type() == Item::SUM_FUNC_ITEM) { // min(k), max(k), count(), avg(k), sum(), - // group_concat() - Item_sum *is = (Item_sum *)item; - if (is->get_arg_count() > 1) { - int dir = 0; - if (((Item_sum *)item)->sum_func() == Item_sum::GROUP_CONCAT_FUNC) { - dir = ((Item_func_group_concat *)item)->direction(); - } - - // only pass 1 group 1 order by case, which is the only case Tianmu - // supported - if (dir == 0 || is->get_arg_count() != 2) - return false; - } - Item *tmp_item = UnRef(is->get_arg(0)); - if (tmp_item->type() == Item::FIELD_ITEM) - ifield = (Item_field *)tmp_item; - else if (tmp_item->type() == Item_tianmufield::get_tianmuitem_type()) - ifield = dynamic_cast(tmp_item)->OriginalItem(); - else - return false; - } else if (item->type() == Item::FIELD_ITEM) - ifield = (Item_field *)item; - else - return false; - - if (!ifield->table_name) { - return false; - // union results have no name, but refer to the temp table - // if(field_alias2num.find(TabIDColAlias((*table_alias2index)[ifield->table_name], - // ifield->item_name.ptr())) == field_alias2num.end()) - // return false; - // col.n = - // field_alias2num[TabIDColAlias((*table_alias2index)[ifield->table_name], - // ifield->item_name.ptr())]; - // tab.n = common::NULL_VALUE_32; - // return true; - } - const char *table_name = GetTableName(ifield); - std::string ext_alias = - std::string(table_name ? table_name : "") + std::string(":") + std::string(ifield->table_name); - auto it = table_alias2index_ptr.lower_bound(ext_alias); - auto it_end = table_alias2index_ptr.upper_bound(ext_alias); - if (it == table_alias2index_ptr.end()) - return false; - for (; it != it_end; it++) { - TABLE *mysql_table = it->second.second; - tab = TabID(it->second.first); - if (ifield->field->table != mysql_table) - continue; - - // FIXME: is this correct? - if (!mysql_table->pos_in_table_list->is_view_or_derived()) { - // Physical table in FROM - TianmuTable - int field_num; - for (field_num = 0; mysql_table->field[field_num]; field_num++) - if (mysql_table->field[field_num]->field_name == ifield->result_field->field_name) - break; - if (!mysql_table->field[field_num]) - continue; - col = AttrID(field_num); - return true; - } else { - // subselect in FROM - TempTable - if (field_alias2num.find(TabIDColAlias(tab.n, ifield->result_field->field_name)) == field_alias2num.end()) - continue; - col.n = field_alias2num[TabIDColAlias(tab.n, ifield->result_field->field_name)]; - return true; - } - } - return false; -} - -QueryRouteTo Query::AddJoins(List &join, TabID &tmp_table, std::vector &left_tables, - std::vector &right_tables, bool in_subquery, bool &first_table /*= true*/, - bool for_subq_in_where /*false*/, bool use_tmp_when_no_join /*false*/) { - if (!join.elements) { - // Use use_tmp_when_no_join when AddJoins - // The caller decides that the scenario is When join_list has no elements and field has sp - if (use_tmp_when_no_join) { - // The index of the subscript [-1] indicates - // that the first physical table is used as the temporary table. - // The subscript value operation in - // TMP_TABLE is required in combination with Query::Preexecute. - TabID tab(-1); - left_tables.push_back(tab); - cq->TmpTable(tmp_table, tab, for_subq_in_where); - return QueryRouteTo::kToTianmu; - } - - // no tables in table list in this select - return QueryRouteTo::kToMySQL; - } - - // on first call first_table = true. It indicates if it is the first table to - // be added is_left is true iff it is nested left join which needs to be - // flatten (all tables regardless of their join type need to be left-joined) - TABLE_LIST *join_ptr; - List_iterator li(join); - std::vector reversed; - - // if the table list was empty altogether, we wouldn't even enter - // Compilation(...) it must be sth. like `select 1 from t1 union select 2` and - // we are in the second select in the union - - reversed.reserve(join.elements); - while ((join_ptr = li++) != nullptr) reversed.push_back(join_ptr); - size_t size = reversed.size(); - for (unsigned int i = 0; i < size; i++) { - join_ptr = reversed[size - i - 1]; - if (join_ptr->nested_join) { - std::vector local_left, local_right; - if (QueryRouteTo::kToMySQL == AddJoins(join_ptr->nested_join->join_list, tmp_table, local_left, local_right, - in_subquery, first_table, for_subq_in_where)) - return QueryRouteTo::kToMySQL; - JoinType join_type = GetJoinTypeAndCheckExpr(join_ptr->outer_join, join_ptr->join_cond()); - CondID cond_id; - if (QueryRouteTo::kToMySQL == BuildCondsIfPossible(join_ptr->join_cond(), cond_id, tmp_table, join_type)) - return QueryRouteTo::kToMySQL; - left_tables.insert(left_tables.end(), right_tables.begin(), right_tables.end()); - local_left.insert(local_left.end(), local_right.begin(), local_right.end()); - if (join_ptr->outer_join) - right_tables = local_left; - else - left_tables.insert(left_tables.end(), local_left.begin(), local_left.end()); - if (join_ptr->join_cond() && join_ptr->outer_join) { - cq->LeftJoinOn(tmp_table, left_tables, right_tables, cond_id); - left_tables.insert(left_tables.end(), right_tables.begin(), right_tables.end()); - right_tables.clear(); - } else if (join_ptr->join_cond() && !join_ptr->outer_join) - cq->InnerJoinOn(tmp_table, left_tables, right_tables, cond_id); - } else { - DEBUG_ASSERT(join_ptr->table && "We require that the table is defined if it is not a nested join"); - const char *database_name = 0; - const char *table_name = 0; - const char *table_alias = 0; - const char *table_path = 0; - TabID tab(0); - if (join_ptr->is_view_or_derived()) { - if (QueryRouteTo::kToMySQL == - Compile(cq, join_ptr->derived_unit()->first_select(), join_ptr->derived_unit()->union_distinct, &tab)) - return QueryRouteTo::kToMySQL; - table_alias = join_ptr->alias; - } else { - if (QueryRouteTo::kToMySQL == TableUnmysterify(join_ptr, database_name, table_name, table_alias, table_path)) - return QueryRouteTo::kToMySQL; - int tab_num = path2num[table_path]; // number of a table on a list in - // `this` QUERY object - int id = t[tab_num]->GetID(); - cq->TableAlias(tab, TabID(tab_num), table_name, id); - } - std::string ext_alias = std::string(table_name ? table_name : "") + std::string(":") + std::string(table_alias); - table_alias2index_ptr.insert(std::make_pair(ext_alias, std::make_pair(tab.n, join_ptr->table))); - if (first_table) { - left_tables.push_back(tab); - DEBUG_ASSERT(!join_ptr->join_cond() && - "It is not possible to join the first table with the LEFT " - "direction"); - cq->TmpTable(tmp_table, tab, for_subq_in_where); - first_table = false; - } else { - cq->Join(tmp_table, tab); - JoinType join_type = GetJoinTypeAndCheckExpr(join_ptr->outer_join, join_ptr->join_cond()); - // if(join_type == JoinType::JO_LEFT && join_ptr->join_cond() && - // dynamic_cast(join_ptr->join_cond())) - // return QueryRouteTo::kToMySQL; - CondID cond_id; - if (QueryRouteTo::kToMySQL == BuildCondsIfPossible(join_ptr->join_cond(), cond_id, tmp_table, join_type)) - return QueryRouteTo::kToMySQL; - if (join_ptr->join_cond() && join_ptr->outer_join) { - right_tables.push_back(tab); - cq->LeftJoinOn(tmp_table, left_tables, right_tables, cond_id); - left_tables.push_back(tab); - right_tables.clear(); - } else if (join_ptr->join_cond() && !join_ptr->outer_join) { - right_tables.push_back(tab); - cq->InnerJoinOn(tmp_table, left_tables, right_tables, cond_id); - left_tables.push_back(tab); - right_tables.clear(); - } else - left_tables.push_back(tab); - // if(join_ptr->on_expr) - // cq->SetLOJOuterDim(tmp_table, tab, i); - } - } - } - return QueryRouteTo::kToTianmu; -} - -QueryRouteTo Query::AddFields(List &fields, TabID const &tmp_table, TabID const &base_table, - bool const group_by_clause, int &num_of_added_fields, bool ignore_minmax, - bool &aggregation_used) { - List_iterator_fast li(fields); - Item *item; - int added = 0; - item = li++; - while (item) { - WrapStatus ws; - common::ColOperation oper; - bool distinct; - if (QueryRouteTo::kToMySQL == OperationUnmysterify(item, oper, distinct, group_by_clause)) - return QueryRouteTo::kToMySQL; - - if (IsAggregationItem(item)) - aggregation_used = true; - - // in case of transformed subquery sometimes we need to revert back - // transformation to MIN/MAX - if (ignore_minmax && (oper == common::ColOperation::MIN || oper == common::ColOperation::MAX)) - oper = common::ColOperation::LISTING; - - // select PHYSICAL COLUMN or AGGREGATION over PHYSICAL COLUMN - if ((IsFieldItem(item) || IsAggregationOverFieldItem(item)) && - (IsLocalColumn(item, tmp_table) || (!base_table.IsNullID() && IsLocalColumn(item, base_table)))) - AddColumnForPhysColumn(item, tmp_table, base_table, oper, distinct, false, item->item_name.ptr()); - // REF to FIELD_ITEM - else if (item->type() == Item::REF_ITEM) { - item = UnRef(item); - continue; - } - // if ((UnRef(item)->type() == Item_tianmufield::enumTIANMUFiledItem::TIANMUFIELD_ITEM || - // UnRef(item)->type() == Item_tianmufield::FIELD_ITEM) && - // IsLocalColumn(UnRef(item), tmp_table)) - // AddColumnForPhysColumn(UnRef(item), tmp_table, oper, distinct, false, false); - // else { - // // - // } - else if (IsAggregationItem(item) && (((Item_sum *)item)->get_arg(0))->type() == Item::REF_ITEM && - (UnRef(((Item_sum *)item)->get_arg(0))->type() == Item_tianmufield::get_tianmuitem_type() || - (UnRef(((Item_sum *)item)->get_arg(0))->type() == Item_tianmufield::FIELD_ITEM)) && - IsLocalColumn(UnRef(((Item_sum *)item)->get_arg(0)), tmp_table)) - // AGGR on REF to FIELD_ITEM - AddColumnForPhysColumn(UnRef(((Item_sum *)item)->get_arg(0)), tmp_table, TabID(), oper, distinct, false, - item->item_name.ptr()); - else if (IsAggregationItem(item)) { - // select AGGREGATION over EXPRESSION - Item_sum *item_sum = (Item_sum *)item; - if (item_sum->get_arg_count() > 1 || HasAggregation(item_sum->get_arg(0))) - return QueryRouteTo::kToMySQL; - if (IsCountStar(item_sum)) { // count(*) doesn't need any virtual column - AttrID at; - cq->AddColumn(at, tmp_table, CQTerm(), oper, item_sum->item_name.ptr(), false); - field_alias2num[TabIDColAlias(tmp_table.n, item_sum->item_name.ptr())] = at.n; - } else { - MysqlExpression *expr; - ws = WrapMysqlExpression(item_sum->get_arg(0), tmp_table, expr, false, false); - if (ws == WrapStatus::FAILURE) - return QueryRouteTo::kToMySQL; - AddColumnForMysqlExpression(expr, tmp_table, - ignore_minmax ? item_sum->get_arg(0)->item_name.ptr() : item_sum->item_name.ptr(), - oper, distinct); - } - } else if (item->type() == Item::SUBSELECT_ITEM) { - CQTerm term; - AttrID at; - if (Item2CQTerm(item, term, tmp_table, - /*group_by_clause ? HAVING_FILTER :*/ CondType::WHERE_COND) == QueryRouteTo::kToMySQL) - return QueryRouteTo::kToMySQL; - cq->AddColumn(at, tmp_table, term, common::ColOperation::LISTING, item->item_name.ptr(), distinct); - field_alias2num[TabIDColAlias(tmp_table.n, item->item_name.ptr())] = at.n; - } else { - // select EXPRESSION - if (HasAggregation(item)) { - oper = common::ColOperation::DELAYED; - aggregation_used = true; - } - MysqlExpression *expr(nullptr); - ws = WrapMysqlExpression(item, tmp_table, expr, false, oper == common::ColOperation::DELAYED); - if (ws == WrapStatus::FAILURE) - return QueryRouteTo::kToMySQL; - if (!item->item_name.ptr()) { - Item_func_conv_charset *item_conv = dynamic_cast(item); - if (item_conv) { - Item **ifunc_args = item_conv->arguments(); - AddColumnForMysqlExpression(expr, tmp_table, ifunc_args[0]->item_name.ptr(), oper, distinct); - } else { - AddColumnForMysqlExpression(expr, tmp_table, item->item_name.ptr(), oper, distinct); - } - } else - AddColumnForMysqlExpression(expr, tmp_table, item->item_name.ptr(), oper, distinct); - } - added++; - item = li++; - } - num_of_added_fields = added; - return QueryRouteTo::kToTianmu; -} - -// todo(dfx): handle more query scenarios -QueryRouteTo Query::AddSemiJoinFiled(List &fields, List &join, const TabID &tmp_table) { - List_iterator_fast field_li(fields); - Item *item; - item = field_li++; - while (item) { - WrapStatus ws; - common::ColOperation oper; - bool distinct; - if (QueryRouteTo::kToMySQL == OperationUnmysterify(item, oper, distinct, 0)) - return QueryRouteTo::kToMySQL; - if (!IsFieldItem(item)) { - item = field_li++; - continue; - } - AddColumnForPhysColumn(item, tmp_table, TabID(), oper, distinct, false, item->item_name.ptr()); - item = field_li++; - } - - TABLE_LIST *join_ptr; - List_iterator li(join); - std::vector reversed; - while ((join_ptr = li++) != nullptr) { - reversed.push_back(join_ptr); - } - size_t size = reversed.size(); - for (unsigned int i = 0; i < size; i++) { - join_ptr = reversed[size - i - 1]; - if (join_ptr->nested_join) { - List_iterator_fast outer_field_li(join_ptr->nested_join->sj_outer_exprs); - Item *outer_item; - outer_item = outer_field_li++; - while (outer_item) { - WrapStatus ws; - common::ColOperation oper; - bool distinct; - if (QueryRouteTo::kToMySQL == OperationUnmysterify(outer_item, oper, distinct, 0)) - return QueryRouteTo::kToMySQL; - if (!IsFieldItem(outer_item)) { - outer_item = outer_field_li++; - continue; - } - AddColumnForPhysColumn(outer_item, tmp_table, TabID(), oper, distinct, false, outer_item->item_name.ptr()); - outer_item = outer_field_li++; - } - } - } - return QueryRouteTo::kToTianmu; -} - -QueryRouteTo Query::AddGroupByFields(ORDER *group_by, const TabID &tmp_table, const TabID &base_table) { - for (; group_by; group_by = group_by->next) { - if (group_by->direction != ORDER::ORDER_ASC) { - my_message(ER_SYNTAX_ERROR, - "Tianmu specific error: Using DESC after GROUP BY clause not " - "allowed. Use " - "ORDER BY to order the result", - MYF(0)); - throw ReturnMeToMySQLWithError(); - } - - Item *item = *(group_by->item); - item = UnRef(item); - // group by PHYSICAL COLUMN - if ((IsFieldItem(item) || (IsAggregationItem(item) && IsFieldItem(((Item_sum *)item)->get_arg(0)))) && - (IsLocalColumn(item, tmp_table) || (!base_table.IsNullID() && IsLocalColumn(item, base_table)))) { - AddColumnForPhysColumn(item, tmp_table, base_table, common::ColOperation::GROUP_BY, false, true); - } else if (item->type() == Item::SUBSELECT_ITEM) { - CQTerm term; - AttrID at; - if (Item2CQTerm(item, term, tmp_table, CondType::WHERE_COND) == QueryRouteTo::kToMySQL) - return QueryRouteTo::kToMySQL; - cq->AddColumn(at, tmp_table, term, common::ColOperation::GROUP_BY, 0); - // field_alias2num[TabIDColAlias(tmp_table.n, - // item->item_name.ptr())] = - // at.n; - } else { // group by COMPLEX EXPRESSION - MysqlExpression *expr = 0; - if (WrapStatus::FAILURE == WrapMysqlExpression(item, tmp_table, expr, true, true)) - return QueryRouteTo::kToMySQL; - AddColumnForMysqlExpression(expr, tmp_table, item->item_name.ptr(), common::ColOperation::GROUP_BY, false, true); - } - } - return QueryRouteTo::kToTianmu; -} - -QueryRouteTo Query::AddOrderByFields(ORDER *order_by, TabID const &tmp_table, TabID const &base_table, - int const group_by_clause) { - for (; order_by; order_by = order_by->next) { - std::pair vc; - Item *item = *(order_by->item); - CQTerm my_term; - QueryRouteTo result{QueryRouteTo::kToMySQL}; - // at first we need to check if we don't have non-deterministic expression - // (e.g., rand()) in such case we should order by output column in TempTable - if (!IsFieldItem(item) && !IsAggregationItem(item) && !IsDeterministic(item) && - item->type() != Item::SUBSELECT_ITEM) { - MysqlExpression *expr = nullptr; - WrapStatus ws = WrapMysqlExpression(item, tmp_table, expr, false, false); - if (ws == WrapStatus::FAILURE) - return QueryRouteTo::kToMySQL; - DEBUG_ASSERT(!expr->IsDeterministic()); - int col_num = AddColumnForMysqlExpression(expr, tmp_table, nullptr, common::ColOperation::LISTING, false, true); - vc = VirtualColumnAlreadyExists(tmp_table, tmp_table, AttrID(-col_num - 1)); - if (vc.first == common::NULL_VALUE_32) { - vc.first = tmp_table.n; - cq->CreateVirtualColumn(vc.second, tmp_table, tmp_table, AttrID(col_num)); - phys2virt.insert(std::make_pair(std::make_pair(tmp_table.n, -col_num - 1), vc)); - } - cq->Add_Order(tmp_table, AttrID(vc.second), (order_by->direction != ORDER::ORDER_ASC)); - continue; - } - if (group_by_clause) { - if (item->type() == Item::FUNC_ITEM) { - MysqlExpression *expr = nullptr; - bool delayed = false; - if (HasAggregation(item)) { - delayed = true; - } - - WrapStatus ws = WrapMysqlExpression(item, tmp_table, expr, false, delayed); - if (ws == WrapStatus::FAILURE) - return QueryRouteTo::kToMySQL; - DEBUG_ASSERT(expr->IsDeterministic()); - int col_num = AddColumnForMysqlExpression( - expr, tmp_table, nullptr, delayed ? common::ColOperation::DELAYED : common::ColOperation::LISTING, false, - true); - vc = VirtualColumnAlreadyExists(tmp_table, tmp_table, AttrID(-col_num - 1)); - if (vc.first == common::NULL_VALUE_32) { - vc.first = tmp_table.n; - cq->CreateVirtualColumn(vc.second, tmp_table, tmp_table, AttrID(col_num)); - phys2virt.insert(std::make_pair(std::make_pair(tmp_table.n, -col_num - 1), vc)); - } - cq->Add_Order(tmp_table, AttrID(vc.second), (order_by->direction != ORDER::ORDER_ASC)); - continue; - // we can reuse transformation done in case of HAVING - // result = Item2CQTerm(item, my_term, tmp_table, CondType::HAVING_COND); - } else { - AttrID at; - result = Item2CQTerm(item, my_term, tmp_table, CondType::HAVING_COND, false, nullptr, nullptr, base_table); - if (item->type() == Item::SUBSELECT_ITEM) { - // create a materialized column with subsel results for the ordering - cq->AddColumn(at, tmp_table, my_term, common::ColOperation::DELAYED, nullptr, false); - vc = VirtualColumnAlreadyExists(tmp_table, tmp_table, at); - if (vc.first == common::NULL_VALUE_32) { - vc.first = tmp_table.n; - cq->CreateVirtualColumn(vc.second, tmp_table, tmp_table, at); - phys2virt.insert(std::make_pair(std::pair(tmp_table.n, at.n), vc)); - } - } else // a naked column - vc.second = my_term.vc_id; - // cq->Add_Order(tmp_table, AttrID(vc.second), !(order_by->asc)); - } - - } else { - result = Item2CQTerm(item, my_term, tmp_table, CondType::WHERE_COND); - vc.second = my_term.vc_id; - } - if (result != QueryRouteTo::kToTianmu) - return QueryRouteTo::kToMySQL; - cq->Add_Order(tmp_table, AttrID(vc.second), order_by->direction != ORDER::ORDER_ASC); - } - return QueryRouteTo::kToTianmu; -} - -QueryRouteTo Query::AddGlobalOrderByFields(SQL_I_List *global_order, const TabID &tmp_table, int max_col) { - if (!global_order) - return QueryRouteTo::kToTianmu; - - ORDER *order_by; - for (uint i = 0; i < global_order->elements; i++) { - order_by = (i == 0 ? (ORDER *)(global_order->first) : order_by->next); - // the way to traverse 'global_order' list maybe is not very orthodox, but - // it works - - if (order_by == nullptr) - return QueryRouteTo::kToMySQL; - - int col_num = common::NULL_VALUE_32; - if ((*(order_by->item))->type() == Item::INT_ITEM) { - col_num = int((*(order_by->item))->val_int()); - if (col_num < 1 || col_num > max_col) - return QueryRouteTo::kToMySQL; - col_num--; - col_num = -col_num - 1; // make it negative as are columns in TempTable - } else { - Item *item = *(order_by->item); - if (!item->item_name.ptr()) - return QueryRouteTo::kToMySQL; - bool found = false; - for (auto &it : field_alias2num) { - if (tmp_table.n == it.first.first && strcasecmp(it.first.second.c_str(), item->item_name.ptr()) == 0) { - col_num = it.second; - found = true; - break; - } - } - if (!found) - return QueryRouteTo::kToMySQL; - } - int attr; - cq->CreateVirtualColumn(attr, tmp_table, tmp_table, AttrID(col_num)); - phys2virt.insert(std::make_pair(std::make_pair(tmp_table.n, col_num), std::make_pair(tmp_table.n, attr))); - cq->Add_Order(tmp_table, AttrID(attr), order_by->direction != ORDER::ORDER_ASC); - } - - return QueryRouteTo::kToTianmu; -} - -Query::WrapStatus Query::WrapMysqlExpression(Item *item, const TabID &tmp_table, MysqlExpression *&expr, bool in_where, - bool aggr_used) { - // Check if the expression doesn't contain any strange items that we don't - // want to see. By the way, collect references to all Item_field objects. - std::set ifields; - MysqlExpression::Item2VarID item2varid; - if (!MysqlExpression::SanityAggregationCheck(item, ifields)) - return WrapStatus::FAILURE; - - // this large "if" can be removed to use common code, but many small "ifs" - // must be created then - if (in_where) { - // create a map: [Item_field pointer] -> VarID - for (auto &it : ifields) { - if (IsAggregationItem(it)) { - // a few checkings for aggregations - Item_sum *aggregation = (Item_sum *)it; - if (aggregation->get_arg_count() > 1) - return WrapStatus::FAILURE; - if (IsCountStar(aggregation)) // count(*) doesn't need any virtual column - return WrapStatus::FAILURE; - } - AttrID col, at; - TabID tab; - // find [tab] and [col] which identify column in TIANMU - if (!FieldUnmysterify(it, tab, col)) - return WrapStatus::FAILURE; - if (!cq->ExistsInTempTable(tab, tmp_table)) { - bool is_group_by; - TabID params_table = cq->FindSourceOfParameter(tab, tmp_table, is_group_by); - common::ColOperation oper; - bool distinct; - if (QueryRouteTo::kToMySQL == OperationUnmysterify(it, oper, distinct, true)) - return WrapStatus::FAILURE; - if (is_group_by && !IsParameterFromWhere(params_table)) { - col.n = AddColumnForPhysColumn(it, params_table, TabID(), oper, distinct, true); - item2varid[it] = VarID(params_table.n, col.n); - } else - item2varid[it] = VarID(tab.n, col.n); - } else { - // aggregation in WHERE not possible unless it is a parameter - DEBUG_ASSERT(!IsAggregationItem(it)); - item2varid[it] = VarID(tab.n, col.n); - } - } - } else { // !in_where - WrapStatus ws; - AttrID at, vc; - for (auto &it : ifields) { - if (IsAggregationItem(it)) { - Item_sum *aggregation = (Item_sum *)it; - if (aggregation->get_arg_count() > 1) - return WrapStatus::FAILURE; - - if (IsCountStar(aggregation)) { // count(*) doesn't need any virtual column - at.n = GetAddColumnId(AttrID(common::NULL_VALUE_32), tmp_table, common::ColOperation::COUNT, false); - if (at.n == common::NULL_VALUE_32) // doesn't exist yet - cq->AddColumn(at, tmp_table, CQTerm(), common::ColOperation::COUNT, nullptr, false); - } else { - common::ColOperation oper; - bool distinct; - if (QueryRouteTo::kToMySQL == OperationUnmysterify(aggregation, oper, distinct, true)) - return WrapStatus::FAILURE; - AttrID col; - TabID tab; - if (IsFieldItem(aggregation->get_arg(0)) && FieldUnmysterify(aggregation, tab, col) && - cq->ExistsInTempTable(tab, tmp_table)) { - // PHYSICAL COLUMN - at.n = AddColumnForPhysColumn(aggregation->get_arg(0), tmp_table, TabID(), oper, distinct, true); - } else { - // EXPRESSION - ws = WrapMysqlExpression(aggregation->get_arg(0), tmp_table, expr, in_where, false); - if (ws == WrapStatus::FAILURE) - return ws; - at.n = AddColumnForMysqlExpression(expr, tmp_table, aggregation->item_name.ptr(), oper, distinct, true); - } - } - item2varid[it] = VarID(tmp_table.n, at.n); - } else if (IsFieldItem(it)) { - AttrID col; - TabID tab; - if (!FieldUnmysterify(it, tab, col)) - return WrapStatus::FAILURE; - if (!cq->ExistsInTempTable(tab, tmp_table)) { - bool is_group_by; - TabID params_table = cq->FindSourceOfParameter(tab, tmp_table, is_group_by); - common::ColOperation oper; - bool distinct; - if (QueryRouteTo::kToMySQL == OperationUnmysterify(it, oper, distinct, true)) - return WrapStatus::FAILURE; - if (is_group_by && !IsParameterFromWhere(params_table)) { - col.n = AddColumnForPhysColumn(it, params_table, TabID(), oper, distinct, true); - item2varid[it] = VarID(params_table.n, col.n); - } else - item2varid[it] = VarID(tab.n, col.n); - } else if (aggr_used) { - common::ColOperation oper; - bool distinct; - if (QueryRouteTo::kToMySQL == OperationUnmysterify(it, oper, distinct, true)) - return WrapStatus::FAILURE; - at.n = AddColumnForPhysColumn(it, tmp_table, TabID(), oper, distinct, true); - item2varid[it] = VarID(tmp_table.n, at.n); - } else { - item2varid[it] = VarID(tab.n, col.n); - } - } else - DEBUG_ASSERT(0); // unknown item type? - } - } - gc_expressions.push_back(expr = new MysqlExpression(item, item2varid)); - return WrapStatus::SUCCESS; -} - -int Query::AddColumnForPhysColumn(Item *item, const TabID &tmp_table, TabID const &base_table, - const common::ColOperation oper, const bool distinct, bool group_by, - const char *alias) { - std::pair vc; - AttrID col, at; - TabID tab; - if (!FieldUnmysterify(item, tab, col)) - return common::NULL_VALUE_32; - if (tab.n == common::NULL_VALUE_32) - tab = tmp_table; // table name not contained in item - must be the result - // temp_table - - if (base_table.IsNullID()) { - DEBUG_ASSERT(cq->ExistsInTempTable(tab, tmp_table)); - if (item->type() == Item_tianmufield::get_tianmuitem_type() && - IsAggregationItem(dynamic_cast(item)->OriginalItem())) { - return ((Item_tianmufield *)item)->varID[0].col; - } - vc = VirtualColumnAlreadyExists(tmp_table, tab, col); - if (vc.first == common::NULL_VALUE_32) { - vc.first = tmp_table.n; - cq->CreateVirtualColumn(vc.second, tmp_table, tab, col); - phys2virt.insert(std::make_pair(std::make_pair(tab.n, col.n), vc)); - } else { - int attr = GetAddColumnId(AttrID(vc.second), tmp_table, oper, distinct); - if (attr != common::NULL_VALUE_32) { - if (group_by) // do not add column - not needed duplicate - return attr; - // vc.n = col_to_vc[attr]; - } else if (group_by && oper == common::ColOperation::GROUP_BY && - (attr = GetAddColumnId(AttrID(vc.second), tmp_table, common::ColOperation::LISTING, distinct)) != - common::NULL_VALUE_32) { - // modify existing column - CQChangeAddColumnLIST2GROUP_BY(tmp_table, attr); - return attr; - } else if (group_by && oper == common::ColOperation::LISTING && - (attr = GetAddColumnId(AttrID(vc.second), tmp_table, common::ColOperation::GROUP_BY, distinct)) != - common::NULL_VALUE_32) { - // don;t add unnecessary column to select list - return attr; - } - } - } else { - std::pair phys_vc = VirtualColumnAlreadyExists(base_table, tab, col); - if (phys_vc.first == common::NULL_VALUE_32) { - return common::NULL_VALUE_32; - } - vc = VirtualColumnAlreadyExists(tmp_table, TabID(phys_vc.first), AttrID(phys_vc.first)); - if (vc.first == common::NULL_VALUE_32) { - vc.first = tmp_table.n; - int at_id = field_alias2num[TabIDColAlias(base_table.n, item->item_name.ptr())]; - cq->CreateVirtualColumn(vc.second, tmp_table, TabID(phys_vc.first), AttrID(at_id)); - phys2virt.insert(std::make_pair(std::make_pair(phys_vc.first, at_id), vc)); - } else { - int attr = GetAddColumnId(AttrID(vc.second), tmp_table, oper, distinct); - if (attr != common::NULL_VALUE_32) { - if (group_by) // do not add column - not needed duplicate - return attr; - } else if (group_by && oper == common::ColOperation::GROUP_BY && - (attr = GetAddColumnId(AttrID(vc.second), tmp_table, common::ColOperation::LISTING, distinct)) != - common::NULL_VALUE_32) { - CQChangeAddColumnLIST2GROUP_BY(tmp_table, attr); - return attr; - } else if (group_by && oper == common::ColOperation::LISTING && - (attr = GetAddColumnId(AttrID(vc.second), tmp_table, common::ColOperation::GROUP_BY, distinct)) != - common::NULL_VALUE_32) { - return attr; - } - } - } - - if (!item->item_name.ptr() && item->type() == Item::SUM_FUNC_ITEM) { - cq->AddColumn(at, tmp_table, CQTerm(vc.second), oper, - group_by ? nullptr : ((Item_field *)(((Item_sum *)item)->get_arg(0)))->item_name.ptr(), distinct); - } else { - if (item->type() == Item::SUM_FUNC_ITEM && ((Item_sum *)item)->sum_func() == Item_sum::GROUP_CONCAT_FUNC) { - // pass the seprator to construct the special instruction - char *ptr = ((Item_func_group_concat *)item)->get_separator()->c_ptr(); - SpecialInstruction si; - si.separator.assign(ptr, std::strlen(ptr)); - si.order = ((Item_func_group_concat *)item)->direction(); - cq->AddColumn(at, tmp_table, CQTerm(vc.second), oper, group_by ? nullptr : item->item_name.ptr(), distinct, &si); - } else { - cq->AddColumn(at, tmp_table, CQTerm(vc.second), oper, group_by ? nullptr : item->item_name.ptr(), distinct); - } - } - if (!group_by && item->item_name.ptr()) - field_alias2num[TabIDColAlias(tmp_table.n, alias ? alias : item->item_name.ptr())] = at.n; - return at.n; -} - -int Query::AddColumnForMysqlExpression(MysqlExpression *mysql_expression, const TabID &tmp_table, const char *alias, - const common::ColOperation oper, const bool distinct, - bool group_by /*= false*/) { - AttrID at, vc; - vc.n = VirtualColumnAlreadyExists(tmp_table, mysql_expression); - if (vc.n == common::NULL_VALUE_32) { - cq->CreateVirtualColumn(vc, tmp_table, mysql_expression, - (oper == common::ColOperation::DELAYED ? tmp_table : TabID(common::NULL_VALUE_32))); - tab_id2expression.insert(std::make_pair(tmp_table, std::make_pair(vc.n, mysql_expression))); - } else { - mysql_expression->RemoveUnusedVarID(); - int attr = GetAddColumnId(vc, tmp_table, oper, distinct); - if (attr != common::NULL_VALUE_32) { - if (group_by) // do not add column - not needed duplicate - return attr; - // vc.n = col_to_vc[attr]; - } else if (group_by && oper == common::ColOperation::GROUP_BY && - (attr = GetAddColumnId(vc, tmp_table, common::ColOperation::LISTING, distinct)) != - common::NULL_VALUE_32) { - // modify existing column - CQChangeAddColumnLIST2GROUP_BY(tmp_table, attr); - return attr; - } else if (group_by && oper == common::ColOperation::LISTING && - (attr = GetAddColumnId(vc, tmp_table, common::ColOperation::GROUP_BY, distinct)) != - common::NULL_VALUE_32) { - // don;t add unnecessary column to select list - return attr; - } - } - - // if (parametrized) - // cq->AddColumn(at, tmp_table, CQTerm(vc.n), DELAYED, group_by ? nullptr : - // alias, distinct); - // else - cq->AddColumn(at, tmp_table, CQTerm(vc.n), oper, group_by ? nullptr : alias, distinct); - if (!group_by && alias) - field_alias2num[TabIDColAlias(tmp_table.n, alias)] = at.n; - return at.n; -} - -bool Query::IsLocalColumn(Item *item, const TabID &tmp_table) { - DEBUG_ASSERT(IsFieldItem(item) || IsAggregationItem(item)); - AttrID col; - TabID tab; - if (!FieldUnmysterify(item, tab, col)) - return false; - return cq->ExistsInTempTable(tab, tmp_table); -} - -QueryRouteTo Query::Compile(CompiledQuery *compiled_query, SELECT_LEX *selects_list, SELECT_LEX *last_distinct, - TabID *res_tab, bool ignore_limit, Item *left_expr_for_subselect, - common::Operator *oper_for_subselect, bool ignore_minmax, bool for_subq_in_where) { - MEASURE_FET("Query::Compile(...)"); - // at this point all tables are in RCBase engine, so we can proceed with the - // query - - /*Item_func - | - --Item_int_func <- arguments are kept in an array accessible through arguments() - | - --Item_bool_func - | | - | ---Item_cond <- arguments are kept in a list accessible through argument_list() - | | | - | | ---Item_cond_and <- when negated OR of negated items is created - | | | - | | ---Item_cond_or <- when negated AND of negated items is created - | | | - | | ---Item_cond_xor - | | - | ---Item_equal <- arguments are kept in a list accessible through argument_list() - | | + const_item (accessible through get_const() ) - | | (multiple equality) - | | - | ---Item_func_not - | | (???) - | | - | ---Item func_isnull <- when negated IS NOT NULL is created - | - --Item_func_opt_neg <- arguments are kept in an array accessible through arguments(), if negated - | | this information is kept additionally (in a field named 'negated') - | | - | | - | ---Item_func_in - | | - | | - | ---Item_func_between - | - | - --Item_bool_func2 - | - | - ---Item_bool_rowready_func2 <-arguments are kept in an array accessible through arguments(), if negated - | an object of a corresponding class is created - | (e.q. ~Item_func_lt => Item_func_ge) - | - ----Item_func_eq - | - | - ----Item_func_ne - | - | - ----Item_func_ge - | - | - ----Item_func_le - | - | - ----Item_func_gt - | - | - ----Item_func_lt - | - | - ----Item_func_equal <- This is mystery so far - There are 3 equality functions: - Item_equal -> multiple equality (many fields and optional additional constant value) - Item_func_equal -> ??? - Item_func_eq -> pairwise equality - */ - - bool union_all = (last_distinct == nullptr); - TabID prev_result; - - SQL_I_List *global_order = nullptr; - int col_count = 0; - int64_t global_limit_value = -1; - int64_t global_offset_value = -1; - - // local copy of current cq, to be restored on exit - CompiledQuery *saved_cq = cq; - cq = compiled_query; - - if ((selects_list->join) && - (selects_list != selects_list->join->unit->global_parameters())) { // only in case of unions this is set - SetLimit(selects_list->join->unit->global_parameters(), 0, global_offset_value, (int64_t &)global_limit_value); - global_order = &(selects_list->join->unit->global_parameters()->order_list); - } - - for (SELECT_LEX *sl = selects_list; sl; sl = sl->next_select()) { - int64_t limit_value = -1; - int64_t offset_value = -1; - /* - Increase the identification of whether to create a JOIN object, - which is used to release the JOIN object later. - See #669 for the problems solved. - */ - bool ifNewJoinForTianmu = false; - if (!sl->join) { - sl->add_active_options(SELECT_NO_UNLOCK); - JOIN *join = new JOIN(sl->master_unit()->thd, sl); - - if (!join) { - sl->cleanup(0); - return QueryRouteTo::kToTianmu; - } - ifNewJoinForTianmu = true; - sl->set_join(join); - } - - if (QueryRouteTo::kToMySQL == JudgeErrors(sl)) - return QueryRouteTo::kToMySQL; - - SetLimit(sl, sl == selects_list ? 0 : sl->join->unit->global_parameters(), offset_value, limit_value); - List *fields = &sl->fields_list; - - Item *conds = (ifNewJoinForTianmu || !sl->join->where_cond) ? sl->where_cond() : sl->join->where_cond; - - ORDER *order = sl->order_list.first; - - // if (order) global_order = 0; //we want to zero global order (which - // seems to be always present) if we find a local order by clause - // The above is not necessary since global_order is set only in case of - // real UNIONs - - ORDER *group = sl->group_list.first; - Item *having = sl->having_cond(); - List *join_list = sl->join_list; - bool zero_result = sl->join->zero_result_cause != nullptr; - - // The exists subquery determines whether a value exists during the query optimization phase - // result is not set to zero only when a matching value is found in the query optimization phase - // When a field has an index, the optimization phase scans the table through the index - // The primary key implementation of the current column storage engine - // has a problem with the primary key index to scan the table for data - // Remove the following temporary practices after primary key indexing is complete - if (zero_result) { - if (Item::Type::SUBSELECT_ITEM == (conds->type())) { - zero_result = false; - } else { - Item_cond *item_cond = dynamic_cast(conds); - if (item_cond) { - List_iterator_fast li(*item_cond->argument_list()); - Item *item; - while ((item = li++)) { - if (item && Item::Type::SUBSELECT_ITEM == (item->type())) { - zero_result = false; - break; - } - } - } - } - } - - // When join_list has no elements and field has sp, tmp table is used and de-duplicated - // Use use_tmp_when_no_join when AddJoins - bool use_tmp_when_no_join = false; - if (!join_list->elements) { - List_iterator_fast li(*fields); - for (Item *item = li++; item; item = li++) { - if ((item->type() == Item::Type::FUNC_ITEM) && - ((down_cast(item)->functype() == Item_func::Functype::FUNC_SP) || - (down_cast(item)->functype() == Item_func::Functype::SUSERVAR_FUNC)) && - (!sl->is_distinct())) { - sl->add_active_options(SELECT_DISTINCT); - sl->join->select_distinct = TRUE; - use_tmp_when_no_join = true; - break; - } - } - } - - // partial optimization of LOJ conditions, JOIN::optimize(part=3) - // necessary due to already done basic transformation of conditions - // see comments in sql_select.cc:JOIN::optimize() - if (IsLOJ(join_list) && - ((sl->join->m_select_limit) && - ((sl->join->where_cond) || (sl->join->where_cond && (uint64_t)sl->join->where_cond != 0x01)))) { - sl->join->optimize(OptimizePhase::Finish_LOJ_Transform); - } - - Item *field_for_subselect; - Item *cond_to_reinsert = nullptr; - List *list_to_reinsert = nullptr; - - TabID tmp_table; - try { - if (left_expr_for_subselect) - if (!ClearSubselectTransformation(*oper_for_subselect, field_for_subselect, conds, having, cond_to_reinsert, - list_to_reinsert, left_expr_for_subselect)) - throw CompilationError(); - - if (having && !group) // we cannot handle the case of a having without a group by - throw CompilationError(); - - // handle table list - TABLE_LIST *tables = sl->leaf_tables ? sl->leaf_tables : (TABLE_LIST *)sl->table_list.first; - for (TABLE_LIST *table_ptr = tables; table_ptr; table_ptr = table_ptr->next_leaf) { - if (!table_ptr->is_view_or_derived()) { - if (!Engine::IsTianmuTable(table_ptr->table)) - throw CompilationError(); - std::string path = TablePath(table_ptr); - if (path2num.find(path) == path2num.end()) { - path2num[path] = NumOfTabs(); - AddTable(m_conn->GetTableByPath(path)); - TIANMU_LOG(LogCtl_Level::DEBUG, "add query table: %s", path.c_str()); - } - } - } - - // handle join & join cond - std::vector left_tables, right_tables; - bool first_table = true; - if (QueryRouteTo::kToMySQL == AddJoins(*join_list, tmp_table, left_tables, right_tables, - (res_tab != nullptr && res_tab->n != 0), first_table, for_subq_in_where, - use_tmp_when_no_join)) - throw CompilationError(); - - // handle fields - List field_list_for_subselect; - if (left_expr_for_subselect && field_for_subselect) { - field_list_for_subselect.push_back(field_for_subselect); - fields = &field_list_for_subselect; - } - bool aggr_used = false; - if (sl->has_sj_nests && group != nullptr) { - // handle semi-join fields (use on group by) - if (QueryRouteTo::kToMySQL == AddSemiJoinFiled(*fields, *join_list, tmp_table)) - throw CompilationError(); - } else { - // handle normal fields - if (QueryRouteTo::kToMySQL == - AddFields(*fields, tmp_table, TabID(), group != nullptr, col_count, ignore_minmax, aggr_used)) - throw CompilationError(); - if (QueryRouteTo::kToMySQL == AddGroupByFields(group, tmp_table, TabID())) - throw CompilationError(); - bool group_by_clause = group != nullptr || sl->join->select_distinct || aggr_used || sl->has_sj_nests; - if (QueryRouteTo::kToMySQL == AddOrderByFields(order, tmp_table, TabID(), group_by_clause)) - throw CompilationError(); - } - - // handle where cond - CondID cond_id; - if (QueryRouteTo::kToMySQL == BuildConditions(conds, cond_id, cq, tmp_table, CondType::WHERE_COND, zero_result)) - throw CompilationError(); - - cq->AddConds(tmp_table, cond_id, CondType::WHERE_COND); - - // handle having cond - cond_id = CondID(); - if (QueryRouteTo::kToMySQL == BuildConditions(having, cond_id, cq, tmp_table, CondType::HAVING_COND)) - throw CompilationError(); - - cq->AddConds(tmp_table, cond_id, CondType::HAVING_COND); - cq->ApplyConds(tmp_table); - - // handle group by & order by after semi-join - if (sl->has_sj_nests) { - if (group != nullptr) { - cq->Mode(tmp_table, TMParameter::TM_DISTINCT); - TabID new_tmp_table; - cq->TmpTable(new_tmp_table, tmp_table, false); - if (QueryRouteTo::kToMySQL == - AddFields(*fields, new_tmp_table, tmp_table, group != nullptr, col_count, ignore_minmax, aggr_used)) - throw CompilationError(); - if (QueryRouteTo::kToMySQL == AddGroupByFields(group, new_tmp_table, tmp_table)) - throw CompilationError(); - if (QueryRouteTo::kToMySQL == AddOrderByFields(order, new_tmp_table, tmp_table, - group != nullptr || sl->join->select_distinct || aggr_used)) - throw CompilationError(); - tmp_table = new_tmp_table; - } else { - cq->Mode(tmp_table, TMParameter::TM_DISTINCT); - } - } - } catch (...) { - // restore original values of class fields (necessary if this method is - // called recursively) - cq = saved_cq; - if (cond_to_reinsert && list_to_reinsert) - list_to_reinsert->push_back(cond_to_reinsert); - if (ifNewJoinForTianmu) - sl->cleanup(true); - return QueryRouteTo::kToMySQL; - } - - if (sl->join->select_distinct) - cq->Mode(tmp_table, TMParameter::TM_DISTINCT); - if (!ignore_limit && limit_value >= 0 && !sl->has_sj_nests) - cq->Mode(tmp_table, TMParameter::TM_TOP, offset_value, limit_value); - - if (sl == selects_list) { - prev_result = tmp_table; - if (global_order && !selects_list->next_select()) { // trivial union with one select and - // ext. order by - tmp_table = TabID(); - cq->Union(prev_result, prev_result, tmp_table, true); - } - } else - cq->Union(prev_result, prev_result, tmp_table, union_all); - if (sl == last_distinct) - union_all = true; - if (cond_to_reinsert && list_to_reinsert) - list_to_reinsert->push_back(cond_to_reinsert); - if (ifNewJoinForTianmu) - sl->cleanup(true); - } - - cq->BuildTableIDStepsMap(); - - if (QueryRouteTo::kToMySQL == AddGlobalOrderByFields(global_order, prev_result, col_count)) - return QueryRouteTo::kToMySQL; - - if (!ignore_limit && global_limit_value >= 0) - cq->Mode(prev_result, TMParameter::TM_TOP, global_offset_value, global_limit_value); - - if (res_tab != nullptr) - *res_tab = prev_result; - else - cq->Result(prev_result); - cq = saved_cq; - return QueryRouteTo::kToTianmu; -} - -JoinType Query::GetJoinTypeAndCheckExpr(uint outer_join, Item *on_expr) { - if (outer_join) - ASSERT(on_expr != 0, "on_expr shouldn't be null when outer_join != 0"); - - JoinType join_type; - - if ((outer_join & JOIN_TYPE_LEFT) && (outer_join & JOIN_TYPE_RIGHT)) - join_type = JoinType::JO_FULL; - else if (outer_join & JOIN_TYPE_LEFT) - join_type = JoinType::JO_LEFT; - else if (outer_join & JOIN_TYPE_RIGHT) - join_type = JoinType::JO_RIGHT; - else - join_type = JoinType::JO_INNER; - - return join_type; -} - -bool Query::IsLOJ(List *join) { - TABLE_LIST *join_ptr{nullptr}; - List_iterator li(*join); - while ((join_ptr = li++)) { - JoinType join_type = GetJoinTypeAndCheckExpr(join_ptr->outer_join, join_ptr->join_cond()); - if (join_ptr->join_cond() && (join_type == JoinType::JO_LEFT || join_type == JoinType::JO_RIGHT)) - return true; - } - return false; -} -} // namespace core -} // namespace Tianmu +/* Copyright (c) 2022 StoneAtom, Inc. All rights reserved. + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#include + +#include "common/mysql_gate.h" +#include "core/engine.h" +#include "core/mysql_expression.h" +#include "core/query.h" +#include "core/transaction.h" +#include "optimizer/compile/compilation_tools.h" +#include "optimizer/compile/compiled_query.h" + +namespace Tianmu { +namespace core { +QueryRouteTo TableUnmysterify(TABLE_LIST *tab, const char *&database_name, const char *&table_name, + const char *&table_alias, const char *&table_path) { + ASSERT_MYSQL_STRING(tab->table->s->db); + ASSERT_MYSQL_STRING(tab->table->s->table_name); + ASSERT_MYSQL_STRING(tab->table->s->path); + + database_name = tab->table->s->db.str; + if (tab->referencing_view) + table_name = tab->referencing_view->table_name; + else + table_name = tab->table->s->table_name.str; + table_alias = tab->alias; + table_path = tab->table->s->path.str; + + return QueryRouteTo::kToTianmu; +} + +QueryRouteTo JudgeErrors(SELECT_LEX *sl) { + if (!sl->join) { + return QueryRouteTo::kToMySQL; + } + + /* gone with mysql5.6 + if(sl->join && sl->join->procedure) { + my_message(ER_SYNTAX_ERROR, "Tianmu specific error: No PROCEDURE + syntax supported", MYF(0)); throw ReturnMeToMySQLWithError(); + } + */ + + if (sl->offset_limit) + if (sl->offset_limit->type() != Item::INT_ITEM /*|| sl->offset_limit->val_int()*/) { + my_message(ER_SYNTAX_ERROR, "Tianmu specific error: Only numerical OFFSET supported", MYF(0)); + throw ReturnMeToMySQLWithError(); + } + + if (sl->select_limit) + if (sl->select_limit->type() != Item::INT_ITEM) { + my_message(ER_SYNTAX_ERROR, "Tianmu specific error: Only numerical LIMIT supported", MYF(0)); + throw ReturnMeToMySQLWithError(); + } + if (sl->olap == ROLLUP_TYPE) { + /*my_message(ER_SYNTAX_ERROR, "Tianmu specific error: WITH ROLLUP not + supported", MYF(0)); throw ReturnMeToMySQLWithError();*/ + return QueryRouteTo::kToMySQL; + } + + return QueryRouteTo::kToTianmu; +} + +void SetLimit(SELECT_LEX *sl, SELECT_LEX *gsl, int64_t &offset_value, int64_t &limit_value) { + if (sl->select_limit && (!gsl || sl->select_limit != gsl->select_limit)) { + limit_value = sl->select_limit->val_int(); + if (limit_value == UINT_MAX) { /* this value seems to be sometimes + automatically set by MYSQL to UINT_MAX*/ + limit_value = -1; // no limit set + offset_value = -1; + }; + }; + + if (limit_value) + offset_value = 0; + + if (sl->offset_limit && (!gsl || sl->offset_limit != gsl->offset_limit)) + offset_value = sl->offset_limit->val_int(); +} + +// Used in Query::Compile() to break compilation in the middle and make cleanup +// before returning +class CompilationError {}; + +QueryRouteTo Query::FieldUnmysterify(Item *item, const char *&database_name, const char *&table_name, + const char *&table_alias, const char *&table_path, const TABLE *&table_ptr, + const char *&field_name, const char *&field_alias) { + table_alias = EMPTY_TABLE_CONST_INDICATOR; + database_name = nullptr; + table_name = nullptr; + table_path = nullptr; + table_ptr = nullptr; + field_name = nullptr; + field_alias = nullptr; + + item = UnRef(item); + + Item_field *ifield; + switch (static_cast(item->type())) { + case static_cast(Item_tianmufield::enumTIANMUFiledItem::TIANMUFIELD_ITEM): + ifield = dynamic_cast(item)->OriginalItem(); + if (IsAggregationItem(ifield)) { + Item_sum *is = (Item_sum *)ifield; + if (is->get_arg_count() > 1) + return QueryRouteTo::kToMySQL; + Item *tmp_item = UnRef(is->get_arg(0)); + if (tmp_item->type() == Item::FIELD_ITEM) + ifield = (Item_field *)tmp_item; + else if (static_cast(tmp_item->type()) == + static_cast(Item_tianmufield::enumTIANMUFiledItem::TIANMUFIELD_ITEM)) + ifield = dynamic_cast(tmp_item)->OriginalItem(); + else { + return QueryRouteTo::kToMySQL; + } + } + break; + case Item::FIELD_ITEM: // regular select + ifield = (Item_field *)item; + break; + + case Item::SUM_FUNC_ITEM: { // min(k), max(k), count(), avg(k), sum + Item_sum *is = (Item_sum *)item; + if (is->get_arg_count() > 1) { + return QueryRouteTo::kToMySQL; + } + Item *tmp_item = UnRef(is->get_arg(0)); + if (tmp_item->type() == Item::FIELD_ITEM) + ifield = (Item_field *)tmp_item; + else if (static_cast(tmp_item->type()) == + static_cast(Item_tianmufield::enumTIANMUFiledItem::TIANMUFIELD_ITEM)) /* *CAUTION* comparision of + enumerators from different + enums */ + ifield = dynamic_cast(tmp_item)->OriginalItem(); + else + return QueryRouteTo::kToMySQL; + break; + } + case Item::FUNC_ITEM: // complex expressions + // if(WrapMysqlExpression(item, ¬_a_table_column) == + // WrapStatus::SUCCESS) + return QueryRouteTo::kToTianmu; + return QueryRouteTo::kToMySQL; + default: + // if(WrapMysqlExpression(item, ¬_a_table_column) == + // WrapStatus::SUCCESS) + return QueryRouteTo::kToTianmu; + return QueryRouteTo::kToMySQL; + }; + + /* + * By MW. Related to bug 1073. + * + * For views, 'table_alias' must be created as a concatenation + * of original view(s) name(s) and the table name + * - currently it's just the table name, which leads to ambiguity and errors + * when the same table is used in another place in the query. + * Note that there can be several nested views. + * + * To retrieve name(s) of the view(s) from which the current 'ifield' comes + * you may try the following expression: + * + * ifield->cached_table->table_list->belong_to_view->alias or + * ...->table_name or + * ifield->cached_table->table_list->referencing_view->alias or + * ...->table_name + * + * Here, 'belong_to_view' and 'referencing_view' are different + * if there are nested views. + * + * Probably ifield->cached_table could be also used to find + * 'database_name', 'table_name' and 'table_path' + * in a simpler way than currently. + */ + + Field *f = ifield->result_field; + + ASSERT_MYSQL_STRING(f->table->s->db); + ASSERT_MYSQL_STRING(f->table->s->table_name); + ASSERT_MYSQL_STRING(f->table->s->path); + + table_ptr = f->table; + table_alias = ifield->table_name; + database_name = f->table->s->db.str; + table_name = GetTableName(ifield); + table_path = f->table->s->path.str; + field_name = f->field_name; + field_alias = ifield->item_name.ptr(); + + return QueryRouteTo::kToTianmu; +} + +bool Query::FieldUnmysterify(Item *item, TabID &tab, AttrID &col) { + Item_field *ifield; + if (item->type() == Item_tianmufield::get_tianmuitem_type()) { + ifield = dynamic_cast(item)->OriginalItem(); + if (IsAggregationItem(ifield)) { + Item_sum *is = (Item_sum *)ifield; + if (is->get_arg_count() > 1) + return false; + Item *tmp_item = UnRef(is->get_arg(0)); + if (tmp_item->type() == Item::FIELD_ITEM) + ifield = (Item_field *)tmp_item; + else if (tmp_item->type() == Item_tianmufield::get_tianmuitem_type()) + ifield = dynamic_cast(tmp_item)->OriginalItem(); + else if (tmp_item->type() == Item::FUNC_ITEM) { + Item_tianmufield *tianmui = dynamic_cast(item); + tab.n = tianmui->varID[0].tab; + col.n = tianmui->varID[0].tab; + return true; + } else + return false; + } + } else if (item->type() == Item::SUM_FUNC_ITEM) { // min(k), max(k), count(), avg(k), sum(), + // group_concat() + Item_sum *is = (Item_sum *)item; + if (is->get_arg_count() > 1) { + int dir = 0; + if (((Item_sum *)item)->sum_func() == Item_sum::GROUP_CONCAT_FUNC) { + dir = ((Item_func_group_concat *)item)->direction(); + } + + // only pass 1 group 1 order by case, which is the only case Tianmu + // supported + if (dir == 0 || is->get_arg_count() != 2) + return false; + } + Item *tmp_item = UnRef(is->get_arg(0)); + if (tmp_item->type() == Item::FIELD_ITEM) + ifield = (Item_field *)tmp_item; + else if (tmp_item->type() == Item_tianmufield::get_tianmuitem_type()) + ifield = dynamic_cast(tmp_item)->OriginalItem(); + else + return false; + } else if (item->type() == Item::FIELD_ITEM) + ifield = (Item_field *)item; + else + return false; + + if (!ifield->table_name) { + return false; + // union results have no name, but refer to the temp table + // if(field_alias2num.find(TabIDColAlias((*table_alias2index)[ifield->table_name], + // ifield->item_name.ptr())) == field_alias2num.end()) + // return false; + // col.n = + // field_alias2num[TabIDColAlias((*table_alias2index)[ifield->table_name], + // ifield->item_name.ptr())]; + // tab.n = common::NULL_VALUE_32; + // return true; + } + const char *table_name = GetTableName(ifield); + std::string ext_alias = + std::string(table_name ? table_name : "") + std::string(":") + std::string(ifield->table_name); + auto it = table_alias2index_ptr.lower_bound(ext_alias); + auto it_end = table_alias2index_ptr.upper_bound(ext_alias); + if (it == table_alias2index_ptr.end()) + return false; + for (; it != it_end; it++) { + TABLE *mysql_table = it->second.second; + tab = TabID(it->second.first); + if (ifield->field->table != mysql_table) + continue; + + // FIXME: is this correct? + if (!mysql_table->pos_in_table_list->is_view_or_derived()) { + // Physical table in FROM - TianmuTable + int field_num; + for (field_num = 0; mysql_table->field[field_num]; field_num++) + if (mysql_table->field[field_num]->field_name == ifield->result_field->field_name) + break; + if (!mysql_table->field[field_num]) + continue; + col = AttrID(field_num); + return true; + } else { + // subselect in FROM - TempTable + if (field_alias2num.find(TabIDColAlias(tab.n, ifield->result_field->field_name)) == field_alias2num.end()) + continue; + col.n = field_alias2num[TabIDColAlias(tab.n, ifield->result_field->field_name)]; + return true; + } + } + return false; +} + +QueryRouteTo Query::AddJoins(List &join, TabID &tmp_table, std::vector &left_tables, + std::vector &right_tables, bool in_subquery, bool &first_table /*= true*/, + bool for_subq_in_where /*false*/, bool use_tmp_when_no_join /*false*/) { + if (!join.elements) { + // Use use_tmp_when_no_join when AddJoins + // The caller decides that the scenario is When join_list has no elements and field has sp + if (use_tmp_when_no_join) { + // The index of the subscript [-1] indicates + // that the first physical table is used as the temporary table. + // The subscript value operation in + // TMP_TABLE is required in combination with Query::Preexecute. + TabID tab(-1); + left_tables.push_back(tab); + cq->TmpTable(tmp_table, tab, TableSubType::NORMAL, for_subq_in_where); + return QueryRouteTo::kToTianmu; + } + + // no tables in table list in this select + return QueryRouteTo::kToMySQL; + } + + // on first call first_table = true. It indicates if it is the first table to + // be added is_left is true iff it is nested left join which needs to be + // flatten (all tables regardless of their join type need to be left-joined) + TABLE_LIST *join_ptr; + List_iterator li(join); + std::vector reversed; + + // if the table list was empty altogether, we wouldn't even enter + // Compilation(...) it must be sth. like `select 1 from t1 union select 2` and + // we are in the second select in the union + + reversed.reserve(join.elements); + while ((join_ptr = li++) != nullptr) reversed.push_back(join_ptr); + size_t size = reversed.size(); + for (unsigned int i = 0; i < size; i++) { + join_ptr = reversed[size - i - 1]; + if (join_ptr->nested_join) { + std::vector local_left, local_right; + if (QueryRouteTo::kToMySQL == AddJoins(join_ptr->nested_join->join_list, tmp_table, local_left, local_right, + in_subquery, first_table, for_subq_in_where)) + return QueryRouteTo::kToMySQL; + JoinType join_type = GetJoinTypeAndCheckExpr(join_ptr->outer_join, join_ptr->join_cond()); + CondID cond_id; + if (QueryRouteTo::kToMySQL == BuildCondsIfPossible(join_ptr->join_cond(), cond_id, tmp_table, join_type)) + return QueryRouteTo::kToMySQL; + left_tables.insert(left_tables.end(), right_tables.begin(), right_tables.end()); + local_left.insert(local_left.end(), local_right.begin(), local_right.end()); + if (join_ptr->outer_join) + right_tables = local_left; + else + left_tables.insert(left_tables.end(), local_left.begin(), local_left.end()); + if (join_ptr->join_cond() && join_ptr->outer_join) { + cq->LeftJoinOn(tmp_table, left_tables, right_tables, cond_id); + left_tables.insert(left_tables.end(), right_tables.begin(), right_tables.end()); + right_tables.clear(); + } else if (join_ptr->join_cond() && !join_ptr->outer_join) + cq->InnerJoinOn(tmp_table, left_tables, right_tables, cond_id); + } else { + DEBUG_ASSERT(join_ptr->table && "We require that the table is defined if it is not a nested join"); + const char *database_name = 0; + const char *table_name = 0; + const char *table_alias = 0; + const char *table_path = 0; + TabID tab(0); + if (join_ptr->is_view_or_derived()) { + if (QueryRouteTo::kToMySQL == + Compile(cq, join_ptr->derived_unit()->first_select(), join_ptr->derived_unit()->union_distinct, &tab)) + return QueryRouteTo::kToMySQL; + table_alias = join_ptr->alias; + } else { + if (QueryRouteTo::kToMySQL == TableUnmysterify(join_ptr, database_name, table_name, table_alias, table_path)) + return QueryRouteTo::kToMySQL; + int tab_num = path2num[table_path]; // number of a table on a list in + // `this` QUERY object + int id = t[tab_num]->GetID(); + cq->TableAlias(tab, TabID(tab_num), table_name, id); + } + std::string ext_alias = std::string(table_name ? table_name : "") + std::string(":") + std::string(table_alias); + table_alias2index_ptr.insert(std::make_pair(ext_alias, std::make_pair(tab.n, join_ptr->table))); + if (first_table) { + left_tables.push_back(tab); + DEBUG_ASSERT(!join_ptr->join_cond() && + "It is not possible to join the first table with the LEFT " + "direction"); + cq->TmpTable(tmp_table, tab, TableSubType::NORMAL, for_subq_in_where); + first_table = false; + } else { + cq->Join(tmp_table, tab); + JoinType join_type = GetJoinTypeAndCheckExpr(join_ptr->outer_join, join_ptr->join_cond()); + // if(join_type == JoinType::JO_LEFT && join_ptr->join_cond() && + // dynamic_cast(join_ptr->join_cond())) + // return QueryRouteTo::kToMySQL; + CondID cond_id; + if (QueryRouteTo::kToMySQL == BuildCondsIfPossible(join_ptr->join_cond(), cond_id, tmp_table, join_type)) + return QueryRouteTo::kToMySQL; + if (join_ptr->join_cond() && join_ptr->outer_join) { + right_tables.push_back(tab); + cq->LeftJoinOn(tmp_table, left_tables, right_tables, cond_id); + left_tables.push_back(tab); + right_tables.clear(); + } else if (join_ptr->join_cond() && !join_ptr->outer_join) { + right_tables.push_back(tab); + cq->InnerJoinOn(tmp_table, left_tables, right_tables, cond_id); + left_tables.push_back(tab); + right_tables.clear(); + } else + left_tables.push_back(tab); + // if(join_ptr->on_expr) + // cq->SetLOJOuterDim(tmp_table, tab, i); + } + } + } + return QueryRouteTo::kToTianmu; +} + +QueryRouteTo Query::AddFields(List &fields, TabID const &tmp_table, TabID const &base_table, + bool const group_by_clause, int &num_of_added_fields, bool ignore_minmax, + bool &aggregation_used) { + List_iterator_fast li(fields); + Item *item; + int added = 0; + item = li++; + while (item) { + WrapStatus ws; + common::ColOperation oper; + bool distinct; + if (QueryRouteTo::kToMySQL == OperationUnmysterify(item, oper, distinct, group_by_clause)) + return QueryRouteTo::kToMySQL; + + if (IsAggregationItem(item)) + aggregation_used = true; + + // in case of transformed subquery sometimes we need to revert back + // transformation to MIN/MAX + if (ignore_minmax && (oper == common::ColOperation::MIN || oper == common::ColOperation::MAX)) + oper = common::ColOperation::LISTING; + + // select PHYSICAL COLUMN or AGGREGATION over PHYSICAL COLUMN + if ((IsFieldItem(item) || IsAggregationOverFieldItem(item)) && + (IsLocalColumn(item, tmp_table) || (!base_table.IsNullID() && IsLocalColumn(item, base_table)))) + AddColumnForPhysColumn(item, tmp_table, base_table, oper, distinct, false, item->item_name.ptr()); + // REF to FIELD_ITEM + else if (item->type() == Item::REF_ITEM) { + item = UnRef(item); + continue; + } + // if ((UnRef(item)->type() == Item_tianmufield::enumTIANMUFiledItem::TIANMUFIELD_ITEM || + // UnRef(item)->type() == Item_tianmufield::FIELD_ITEM) && + // IsLocalColumn(UnRef(item), tmp_table)) + // AddColumnForPhysColumn(UnRef(item), tmp_table, oper, distinct, false, false); + // else { + // // + // } + else if (IsAggregationItem(item) && (((Item_sum *)item)->get_arg(0))->type() == Item::REF_ITEM && + (UnRef(((Item_sum *)item)->get_arg(0))->type() == Item_tianmufield::get_tianmuitem_type() || + (UnRef(((Item_sum *)item)->get_arg(0))->type() == Item_tianmufield::FIELD_ITEM)) && + IsLocalColumn(UnRef(((Item_sum *)item)->get_arg(0)), tmp_table)) + // AGGR on REF to FIELD_ITEM + AddColumnForPhysColumn(UnRef(((Item_sum *)item)->get_arg(0)), tmp_table, TabID(), oper, distinct, false, + item->item_name.ptr()); + else if (IsAggregationItem(item)) { + // select AGGREGATION over EXPRESSION + Item_sum *item_sum = (Item_sum *)item; + if (item_sum->get_arg_count() > 1 || HasAggregation(item_sum->get_arg(0))) + return QueryRouteTo::kToMySQL; + if (IsCountStar(item_sum)) { // count(*) doesn't need any virtual column + AttrID at; + cq->AddColumn(at, tmp_table, CQTerm(), oper, item_sum->item_name.ptr(), false); + field_alias2num[TabIDColAlias(tmp_table.n, item_sum->item_name.ptr())] = at.n; + } else { + MysqlExpression *expr; + ws = WrapMysqlExpression(item_sum->get_arg(0), tmp_table, expr, false, false); + if (ws == WrapStatus::FAILURE) + return QueryRouteTo::kToMySQL; + AddColumnForMysqlExpression(expr, tmp_table, + ignore_minmax ? item_sum->get_arg(0)->item_name.ptr() : item_sum->item_name.ptr(), + oper, distinct); + } + } else if (item->type() == Item::SUBSELECT_ITEM) { + CQTerm term; + AttrID at; + if (Item2CQTerm(item, term, tmp_table, + /*group_by_clause ? HAVING_FILTER :*/ CondType::WHERE_COND) == QueryRouteTo::kToMySQL) + return QueryRouteTo::kToMySQL; + cq->AddColumn(at, tmp_table, term, common::ColOperation::LISTING, item->item_name.ptr(), distinct); + field_alias2num[TabIDColAlias(tmp_table.n, item->item_name.ptr())] = at.n; + } else { + // select EXPRESSION + if (HasAggregation(item)) { + oper = common::ColOperation::DELAYED; + aggregation_used = true; + } + MysqlExpression *expr(nullptr); + ws = WrapMysqlExpression(item, tmp_table, expr, false, oper == common::ColOperation::DELAYED); + if (ws == WrapStatus::FAILURE) + return QueryRouteTo::kToMySQL; + if (!item->item_name.ptr()) { + Item_func_conv_charset *item_conv = dynamic_cast(item); + if (item_conv) { + Item **ifunc_args = item_conv->arguments(); + AddColumnForMysqlExpression(expr, tmp_table, ifunc_args[0]->item_name.ptr(), oper, distinct); + } else { + AddColumnForMysqlExpression(expr, tmp_table, item->item_name.ptr(), oper, distinct); + } + } else + AddColumnForMysqlExpression(expr, tmp_table, item->item_name.ptr(), oper, distinct); + } + added++; + item = li++; + } + num_of_added_fields = added; + return QueryRouteTo::kToTianmu; +} + +// todo(dfx): handle more query scenarios +QueryRouteTo Query::AddSemiJoinFiled(List &fields, List &join, const TabID &tmp_table) { + List_iterator_fast field_li(fields); + Item *item; + item = field_li++; + while (item) { + WrapStatus ws; + common::ColOperation oper; + bool distinct; + if (QueryRouteTo::kToMySQL == OperationUnmysterify(item, oper, distinct, 0)) + return QueryRouteTo::kToMySQL; + if (!IsFieldItem(item)) { + item = field_li++; + continue; + } + AddColumnForPhysColumn(item, tmp_table, TabID(), oper, distinct, false, item->item_name.ptr()); + item = field_li++; + } + + TABLE_LIST *join_ptr; + List_iterator li(join); + std::vector reversed; + while ((join_ptr = li++) != nullptr) { + reversed.push_back(join_ptr); + } + size_t size = reversed.size(); + for (unsigned int i = 0; i < size; i++) { + join_ptr = reversed[size - i - 1]; + if (join_ptr->nested_join) { + List_iterator_fast outer_field_li(join_ptr->nested_join->sj_outer_exprs); + Item *outer_item; + outer_item = outer_field_li++; + while (outer_item) { + WrapStatus ws; + common::ColOperation oper; + bool distinct; + if (QueryRouteTo::kToMySQL == OperationUnmysterify(outer_item, oper, distinct, 0)) + return QueryRouteTo::kToMySQL; + if (!IsFieldItem(outer_item)) { + outer_item = outer_field_li++; + continue; + } + AddColumnForPhysColumn(outer_item, tmp_table, TabID(), oper, distinct, false, outer_item->item_name.ptr()); + outer_item = outer_field_li++; + } + } + } + return QueryRouteTo::kToTianmu; +} + +QueryRouteTo Query::AddGroupByFields(ORDER *group_by, const TabID &tmp_table, const TabID &base_table) { + for (; group_by; group_by = group_by->next) { + if (group_by->direction != ORDER::ORDER_ASC) { + my_message(ER_SYNTAX_ERROR, + "Tianmu specific error: Using DESC after GROUP BY clause not " + "allowed. Use " + "ORDER BY to order the result", + MYF(0)); + throw ReturnMeToMySQLWithError(); + } + + Item *item = *(group_by->item); + item = UnRef(item); + // group by PHYSICAL COLUMN + if ((IsFieldItem(item) || (IsAggregationItem(item) && IsFieldItem(((Item_sum *)item)->get_arg(0)))) && + (IsLocalColumn(item, tmp_table) || (!base_table.IsNullID() && IsLocalColumn(item, base_table)))) { + AddColumnForPhysColumn(item, tmp_table, base_table, common::ColOperation::GROUP_BY, false, true); + } else if (item->type() == Item::SUBSELECT_ITEM) { + CQTerm term; + AttrID at; + if (Item2CQTerm(item, term, tmp_table, CondType::WHERE_COND) == QueryRouteTo::kToMySQL) + return QueryRouteTo::kToMySQL; + cq->AddColumn(at, tmp_table, term, common::ColOperation::GROUP_BY, 0); + // field_alias2num[TabIDColAlias(tmp_table.n, + // item->item_name.ptr())] = + // at.n; + } else { // group by COMPLEX EXPRESSION + MysqlExpression *expr = 0; + if (WrapStatus::FAILURE == WrapMysqlExpression(item, tmp_table, expr, true, true)) + return QueryRouteTo::kToMySQL; + AddColumnForMysqlExpression(expr, tmp_table, item->item_name.ptr(), common::ColOperation::GROUP_BY, false, true); + } + } + return QueryRouteTo::kToTianmu; +} + +QueryRouteTo Query::AddOrderByFields(ORDER *order_by, TabID const &tmp_table, TabID const &base_table, + int const group_by_clause) { + for (; order_by; order_by = order_by->next) { + std::pair vc; + Item *item = *(order_by->item); + CQTerm my_term; + QueryRouteTo result{QueryRouteTo::kToMySQL}; + // at first we need to check if we don't have non-deterministic expression + // (e.g., rand()) in such case we should order by output column in TempTable + if (!IsFieldItem(item) && !IsAggregationItem(item) && !IsDeterministic(item) && + item->type() != Item::SUBSELECT_ITEM) { + MysqlExpression *expr = nullptr; + WrapStatus ws = WrapMysqlExpression(item, tmp_table, expr, false, false); + if (ws == WrapStatus::FAILURE) + return QueryRouteTo::kToMySQL; + DEBUG_ASSERT(!expr->IsDeterministic()); + int col_num = AddColumnForMysqlExpression(expr, tmp_table, nullptr, common::ColOperation::LISTING, false, true); + vc = VirtualColumnAlreadyExists(tmp_table, tmp_table, AttrID(-col_num - 1)); + if (vc.first == common::NULL_VALUE_32) { + vc.first = tmp_table.n; + cq->CreateVirtualColumn(vc.second, tmp_table, tmp_table, AttrID(col_num)); + phys2virt.insert(std::make_pair(std::make_pair(tmp_table.n, -col_num - 1), vc)); + } + cq->Add_Order(tmp_table, AttrID(vc.second), (order_by->direction != ORDER::ORDER_ASC)); + continue; + } + if (group_by_clause) { + if (item->type() == Item::FUNC_ITEM) { + MysqlExpression *expr = nullptr; + bool delayed = false; + if (HasAggregation(item)) { + delayed = true; + } + + WrapStatus ws = WrapMysqlExpression(item, tmp_table, expr, false, delayed); + if (ws == WrapStatus::FAILURE) + return QueryRouteTo::kToMySQL; + DEBUG_ASSERT(expr->IsDeterministic()); + int col_num = AddColumnForMysqlExpression( + expr, tmp_table, nullptr, delayed ? common::ColOperation::DELAYED : common::ColOperation::LISTING, false, + true); + vc = VirtualColumnAlreadyExists(tmp_table, tmp_table, AttrID(-col_num - 1)); + if (vc.first == common::NULL_VALUE_32) { + vc.first = tmp_table.n; + cq->CreateVirtualColumn(vc.second, tmp_table, tmp_table, AttrID(col_num)); + phys2virt.insert(std::make_pair(std::make_pair(tmp_table.n, -col_num - 1), vc)); + } + cq->Add_Order(tmp_table, AttrID(vc.second), (order_by->direction != ORDER::ORDER_ASC)); + continue; + // we can reuse transformation done in case of HAVING + // result = Item2CQTerm(item, my_term, tmp_table, CondType::HAVING_COND); + } else { + AttrID at; + result = Item2CQTerm(item, my_term, tmp_table, CondType::HAVING_COND, false, nullptr, nullptr, base_table); + if (item->type() == Item::SUBSELECT_ITEM) { + // create a materialized column with subsel results for the ordering + cq->AddColumn(at, tmp_table, my_term, common::ColOperation::DELAYED, nullptr, false); + vc = VirtualColumnAlreadyExists(tmp_table, tmp_table, at); + if (vc.first == common::NULL_VALUE_32) { + vc.first = tmp_table.n; + cq->CreateVirtualColumn(vc.second, tmp_table, tmp_table, at); + phys2virt.insert(std::make_pair(std::pair(tmp_table.n, at.n), vc)); + } + } else // a naked column + vc.second = my_term.vc_id; + // cq->Add_Order(tmp_table, AttrID(vc.second), !(order_by->asc)); + } + + } else { + result = Item2CQTerm(item, my_term, tmp_table, CondType::WHERE_COND); + vc.second = my_term.vc_id; + } + if (result != QueryRouteTo::kToTianmu) + return QueryRouteTo::kToMySQL; + cq->Add_Order(tmp_table, AttrID(vc.second), order_by->direction != ORDER::ORDER_ASC); + } + return QueryRouteTo::kToTianmu; +} + +QueryRouteTo Query::AddGlobalOrderByFields(SQL_I_List *global_order, const TabID &tmp_table, int max_col) { + if (!global_order) + return QueryRouteTo::kToTianmu; + + ORDER *order_by; + for (uint i = 0; i < global_order->elements; i++) { + order_by = (i == 0 ? (ORDER *)(global_order->first) : order_by->next); + // the way to traverse 'global_order' list maybe is not very orthodox, but + // it works + + if (order_by == nullptr) + return QueryRouteTo::kToMySQL; + + int col_num = common::NULL_VALUE_32; + if ((*(order_by->item))->type() == Item::INT_ITEM) { + col_num = int((*(order_by->item))->val_int()); + if (col_num < 1 || col_num > max_col) + return QueryRouteTo::kToMySQL; + col_num--; + col_num = -col_num - 1; // make it negative as are columns in TempTable + } else { + Item *item = *(order_by->item); + if (!item->item_name.ptr()) + return QueryRouteTo::kToMySQL; + bool found = false; + for (auto &it : field_alias2num) { + if (tmp_table.n == it.first.first && strcasecmp(it.first.second.c_str(), item->item_name.ptr()) == 0) { + col_num = it.second; + found = true; + break; + } + } + if (!found) + return QueryRouteTo::kToMySQL; + } + int attr; + cq->CreateVirtualColumn(attr, tmp_table, tmp_table, AttrID(col_num)); + phys2virt.insert(std::make_pair(std::make_pair(tmp_table.n, col_num), std::make_pair(tmp_table.n, attr))); + cq->Add_Order(tmp_table, AttrID(attr), order_by->direction != ORDER::ORDER_ASC); + } + + return QueryRouteTo::kToTianmu; +} + +Query::WrapStatus Query::WrapMysqlExpression(Item *item, const TabID &tmp_table, MysqlExpression *&expr, bool in_where, + bool aggr_used) { + // Check if the expression doesn't contain any strange items that we don't + // want to see. By the way, collect references to all Item_field objects. + std::set ifields; + MysqlExpression::Item2VarID item2varid; + if (!MysqlExpression::SanityAggregationCheck(item, ifields)) + return WrapStatus::FAILURE; + + // this large "if" can be removed to use common code, but many small "ifs" + // must be created then + if (in_where) { + // create a map: [Item_field pointer] -> VarID + for (auto &it : ifields) { + if (IsAggregationItem(it)) { + // a few checkings for aggregations + Item_sum *aggregation = (Item_sum *)it; + if (aggregation->get_arg_count() > 1) + return WrapStatus::FAILURE; + if (IsCountStar(aggregation)) // count(*) doesn't need any virtual column + return WrapStatus::FAILURE; + } + AttrID col, at; + TabID tab; + // find [tab] and [col] which identify column in TIANMU + if (!FieldUnmysterify(it, tab, col)) + return WrapStatus::FAILURE; + if (!cq->ExistsInTempTable(tab, tmp_table)) { + bool is_group_by; + TabID params_table = cq->FindSourceOfParameter(tab, tmp_table, is_group_by); + common::ColOperation oper; + bool distinct; + if (QueryRouteTo::kToMySQL == OperationUnmysterify(it, oper, distinct, true)) + return WrapStatus::FAILURE; + if (is_group_by && !IsParameterFromWhere(params_table)) { + col.n = AddColumnForPhysColumn(it, params_table, TabID(), oper, distinct, true); + item2varid[it] = VarID(params_table.n, col.n); + } else + item2varid[it] = VarID(tab.n, col.n); + } else { + // aggregation in WHERE not possible unless it is a parameter + DEBUG_ASSERT(!IsAggregationItem(it)); + item2varid[it] = VarID(tab.n, col.n); + } + } + } else { // !in_where + WrapStatus ws; + AttrID at, vc; + for (auto &it : ifields) { + if (IsAggregationItem(it)) { + Item_sum *aggregation = (Item_sum *)it; + if (aggregation->get_arg_count() > 1) + return WrapStatus::FAILURE; + + if (IsCountStar(aggregation)) { // count(*) doesn't need any virtual column + at.n = GetAddColumnId(AttrID(common::NULL_VALUE_32), tmp_table, common::ColOperation::COUNT, false); + if (at.n == common::NULL_VALUE_32) // doesn't exist yet + cq->AddColumn(at, tmp_table, CQTerm(), common::ColOperation::COUNT, nullptr, false); + } else { + common::ColOperation oper; + bool distinct; + if (QueryRouteTo::kToMySQL == OperationUnmysterify(aggregation, oper, distinct, true)) + return WrapStatus::FAILURE; + AttrID col; + TabID tab; + if (IsFieldItem(aggregation->get_arg(0)) && FieldUnmysterify(aggregation, tab, col) && + cq->ExistsInTempTable(tab, tmp_table)) { + // PHYSICAL COLUMN + at.n = AddColumnForPhysColumn(aggregation->get_arg(0), tmp_table, TabID(), oper, distinct, true); + } else { + // EXPRESSION + ws = WrapMysqlExpression(aggregation->get_arg(0), tmp_table, expr, in_where, false); + if (ws == WrapStatus::FAILURE) + return ws; + at.n = AddColumnForMysqlExpression(expr, tmp_table, aggregation->item_name.ptr(), oper, distinct, true); + } + } + item2varid[it] = VarID(tmp_table.n, at.n); + } else if (IsFieldItem(it)) { + AttrID col; + TabID tab; + if (!FieldUnmysterify(it, tab, col)) + return WrapStatus::FAILURE; + if (!cq->ExistsInTempTable(tab, tmp_table)) { + bool is_group_by; + TabID params_table = cq->FindSourceOfParameter(tab, tmp_table, is_group_by); + common::ColOperation oper; + bool distinct; + if (QueryRouteTo::kToMySQL == OperationUnmysterify(it, oper, distinct, true)) + return WrapStatus::FAILURE; + if (is_group_by && !IsParameterFromWhere(params_table)) { + col.n = AddColumnForPhysColumn(it, params_table, TabID(), oper, distinct, true); + item2varid[it] = VarID(params_table.n, col.n); + } else + item2varid[it] = VarID(tab.n, col.n); + } else if (aggr_used) { + common::ColOperation oper; + bool distinct; + if (QueryRouteTo::kToMySQL == OperationUnmysterify(it, oper, distinct, true)) + return WrapStatus::FAILURE; + at.n = AddColumnForPhysColumn(it, tmp_table, TabID(), oper, distinct, true); + item2varid[it] = VarID(tmp_table.n, at.n); + } else { + item2varid[it] = VarID(tab.n, col.n); + } + } else + DEBUG_ASSERT(0); // unknown item type? + } + } + gc_expressions.push_back(expr = new MysqlExpression(item, item2varid)); + return WrapStatus::SUCCESS; +} + +int Query::AddColumnForPhysColumn(Item *item, const TabID &tmp_table, TabID const &base_table, + const common::ColOperation oper, const bool distinct, bool group_by, + const char *alias) { + std::pair vc; + AttrID col, at; + TabID tab; + if (!FieldUnmysterify(item, tab, col)) + return common::NULL_VALUE_32; + if (tab.n == common::NULL_VALUE_32) + tab = tmp_table; // table name not contained in item - must be the result + // temp_table + + if (base_table.IsNullID()) { + DEBUG_ASSERT(cq->ExistsInTempTable(tab, tmp_table)); + if (item->type() == Item_tianmufield::get_tianmuitem_type() && + IsAggregationItem(dynamic_cast(item)->OriginalItem())) { + return ((Item_tianmufield *)item)->varID[0].col; + } + vc = VirtualColumnAlreadyExists(tmp_table, tab, col); + if (vc.first == common::NULL_VALUE_32) { + vc.first = tmp_table.n; + cq->CreateVirtualColumn(vc.second, tmp_table, tab, col); + phys2virt.insert(std::make_pair(std::make_pair(tab.n, col.n), vc)); + } else { + int attr = GetAddColumnId(AttrID(vc.second), tmp_table, oper, distinct); + if (attr != common::NULL_VALUE_32) { + if (group_by) // do not add column - not needed duplicate + return attr; + // vc.n = col_to_vc[attr]; + } else if (group_by && oper == common::ColOperation::GROUP_BY && + (attr = GetAddColumnId(AttrID(vc.second), tmp_table, common::ColOperation::LISTING, distinct)) != + common::NULL_VALUE_32) { + // modify existing column + CQChangeAddColumnLIST2GROUP_BY(tmp_table, attr); + return attr; + } else if (group_by && oper == common::ColOperation::LISTING && + (attr = GetAddColumnId(AttrID(vc.second), tmp_table, common::ColOperation::GROUP_BY, distinct)) != + common::NULL_VALUE_32) { + // don;t add unnecessary column to select list + return attr; + } + } + } else { + std::pair phys_vc = VirtualColumnAlreadyExists(base_table, tab, col); + if (phys_vc.first == common::NULL_VALUE_32) { + return common::NULL_VALUE_32; + } + vc = VirtualColumnAlreadyExists(tmp_table, TabID(phys_vc.first), AttrID(phys_vc.first)); + if (vc.first == common::NULL_VALUE_32) { + vc.first = tmp_table.n; + int at_id = field_alias2num[TabIDColAlias(base_table.n, item->item_name.ptr())]; + cq->CreateVirtualColumn(vc.second, tmp_table, TabID(phys_vc.first), AttrID(at_id)); + phys2virt.insert(std::make_pair(std::make_pair(phys_vc.first, at_id), vc)); + } else { + int attr = GetAddColumnId(AttrID(vc.second), tmp_table, oper, distinct); + if (attr != common::NULL_VALUE_32) { + if (group_by) // do not add column - not needed duplicate + return attr; + } else if (group_by && oper == common::ColOperation::GROUP_BY && + (attr = GetAddColumnId(AttrID(vc.second), tmp_table, common::ColOperation::LISTING, distinct)) != + common::NULL_VALUE_32) { + CQChangeAddColumnLIST2GROUP_BY(tmp_table, attr); + return attr; + } else if (group_by && oper == common::ColOperation::LISTING && + (attr = GetAddColumnId(AttrID(vc.second), tmp_table, common::ColOperation::GROUP_BY, distinct)) != + common::NULL_VALUE_32) { + return attr; + } + } + } + + if (!item->item_name.ptr() && item->type() == Item::SUM_FUNC_ITEM) { + cq->AddColumn(at, tmp_table, CQTerm(vc.second), oper, + group_by ? nullptr : ((Item_field *)(((Item_sum *)item)->get_arg(0)))->item_name.ptr(), distinct); + } else { + if (item->type() == Item::SUM_FUNC_ITEM && ((Item_sum *)item)->sum_func() == Item_sum::GROUP_CONCAT_FUNC) { + // pass the seprator to construct the special instruction + char *ptr = ((Item_func_group_concat *)item)->get_separator()->c_ptr(); + SpecialInstruction si; + si.separator.assign(ptr, std::strlen(ptr)); + si.order = ((Item_func_group_concat *)item)->direction(); + cq->AddColumn(at, tmp_table, CQTerm(vc.second), oper, group_by ? nullptr : item->item_name.ptr(), distinct, &si); + } else { + cq->AddColumn(at, tmp_table, CQTerm(vc.second), oper, group_by ? nullptr : item->item_name.ptr(), distinct); + } + } + if (!group_by && item->item_name.ptr()) + field_alias2num[TabIDColAlias(tmp_table.n, alias ? alias : item->item_name.ptr())] = at.n; + return at.n; +} + +int Query::AddColumnForMysqlExpression(MysqlExpression *mysql_expression, const TabID &tmp_table, const char *alias, + const common::ColOperation oper, const bool distinct, + bool group_by /*= false*/) { + AttrID at, vc; + vc.n = VirtualColumnAlreadyExists(tmp_table, mysql_expression); + if (vc.n == common::NULL_VALUE_32) { + cq->CreateVirtualColumn(vc, tmp_table, mysql_expression, + (oper == common::ColOperation::DELAYED ? tmp_table : TabID(common::NULL_VALUE_32))); + tab_id2expression.insert(std::make_pair(tmp_table, std::make_pair(vc.n, mysql_expression))); + } else { + mysql_expression->RemoveUnusedVarID(); + int attr = GetAddColumnId(vc, tmp_table, oper, distinct); + if (attr != common::NULL_VALUE_32) { + if (group_by) // do not add column - not needed duplicate + return attr; + // vc.n = col_to_vc[attr]; + } else if (group_by && oper == common::ColOperation::GROUP_BY && + (attr = GetAddColumnId(vc, tmp_table, common::ColOperation::LISTING, distinct)) != + common::NULL_VALUE_32) { + // modify existing column + CQChangeAddColumnLIST2GROUP_BY(tmp_table, attr); + return attr; + } else if (group_by && oper == common::ColOperation::LISTING && + (attr = GetAddColumnId(vc, tmp_table, common::ColOperation::GROUP_BY, distinct)) != + common::NULL_VALUE_32) { + // don;t add unnecessary column to select list + return attr; + } + } + + // if (parametrized) + // cq->AddColumn(at, tmp_table, CQTerm(vc.n), DELAYED, group_by ? nullptr : + // alias, distinct); + // else + cq->AddColumn(at, tmp_table, CQTerm(vc.n), oper, group_by ? nullptr : alias, distinct); + if (!group_by && alias) + field_alias2num[TabIDColAlias(tmp_table.n, alias)] = at.n; + return at.n; +} + +bool Query::IsLocalColumn(Item *item, const TabID &tmp_table) { + DEBUG_ASSERT(IsFieldItem(item) || IsAggregationItem(item)); + AttrID col; + TabID tab; + if (!FieldUnmysterify(item, tab, col)) + return false; + return cq->ExistsInTempTable(tab, tmp_table); +} + +QueryRouteTo Query::Compile(CompiledQuery *compiled_query, SELECT_LEX *selects_list, SELECT_LEX *last_distinct, + TabID *res_tab, bool ignore_limit, Item *left_expr_for_subselect, + common::Operator *oper_for_subselect, bool ignore_minmax, bool for_subq_in_where) { + MEASURE_FET("Query::Compile(...)"); + // at this point all tables are in RCBase engine, so we can proceed with the + // query + + /*Item_func + | + --Item_int_func <- arguments are kept in an array accessible through arguments() + | + --Item_bool_func + | | + | ---Item_cond <- arguments are kept in a list accessible through argument_list() + | | | + | | ---Item_cond_and <- when negated OR of negated items is created + | | | + | | ---Item_cond_or <- when negated AND of negated items is created + | | | + | | ---Item_cond_xor + | | + | ---Item_equal <- arguments are kept in a list accessible through argument_list() + | | + const_item (accessible through get_const() ) + | | (multiple equality) + | | + | ---Item_func_not + | | (???) + | | + | ---Item func_isnull <- when negated IS NOT NULL is created + | + --Item_func_opt_neg <- arguments are kept in an array accessible through arguments(), if negated + | | this information is kept additionally (in a field named 'negated') + | | + | | + | ---Item_func_in + | | + | | + | ---Item_func_between + | + | + --Item_bool_func2 + | + | + ---Item_bool_rowready_func2 <-arguments are kept in an array accessible through arguments(), if negated + | an object of a corresponding class is created + | (e.q. ~Item_func_lt => Item_func_ge) + | + ----Item_func_eq + | + | + ----Item_func_ne + | + | + ----Item_func_ge + | + | + ----Item_func_le + | + | + ----Item_func_gt + | + | + ----Item_func_lt + | + | + ----Item_func_equal <- This is mystery so far + There are 3 equality functions: + Item_equal -> multiple equality (many fields and optional additional constant value) + Item_func_equal -> ??? + Item_func_eq -> pairwise equality + */ + + bool union_all = (last_distinct == nullptr); + TabID prev_result; + + SQL_I_List *global_order = nullptr; + int col_count = 0; + int64_t global_limit_value = -1; + int64_t global_offset_value = -1; + + // local copy of current cq, to be restored on exit + CompiledQuery *saved_cq = cq; + cq = compiled_query; + + if ((selects_list->join) && + (selects_list != selects_list->join->unit->global_parameters())) { // only in case of unions this is set + SetLimit(selects_list->join->unit->global_parameters(), 0, global_offset_value, (int64_t &)global_limit_value); + global_order = &(selects_list->join->unit->global_parameters()->order_list); + } + + for (SELECT_LEX *sl = selects_list; sl; sl = sl->next_select()) { + int64_t limit_value = -1; + int64_t offset_value = -1; + /* + Increase the identification of whether to create a JOIN object, + which is used to release the JOIN object later. See #669 for the problems solved. + */ + bool ifNewJoinForTianmu = false; + if (!sl->join) { + sl->add_active_options(SELECT_NO_UNLOCK); + JOIN *join = new JOIN(sl->master_unit()->thd, sl); + + if (!join) { + sl->cleanup(0); + return QueryRouteTo::kToTianmu; + } + ifNewJoinForTianmu = true; + sl->set_join(join); + } + + if (QueryRouteTo::kToMySQL == JudgeErrors(sl)) + return QueryRouteTo::kToMySQL; + + SetLimit(sl, sl == selects_list ? 0 : sl->join->unit->global_parameters(), offset_value, limit_value); + List *fields = &sl->fields_list; + + Item *conds = (ifNewJoinForTianmu || !sl->join->where_cond) ? sl->where_cond() : sl->join->where_cond; + + ORDER *order = sl->order_list.first; + + // if (order) global_order = 0; //we want to zero global order (which + // seems to be always present) if we find a local order by clause + // The above is not necessary since global_order is set only in case of real UNIONs + + ORDER *group = sl->group_list.first; + Item *having = sl->having_cond(); + List *join_list = sl->join_list; + bool zero_result = sl->join->zero_result_cause != nullptr; + + // The exists subquery determines whether a value exists during the query optimization phase + // result is not set to zero only when a matching value is found in the query optimization phase + // When a field has an index, the optimization phase scans the table through the index + // The primary key implementation of the current column storage engine has a problem with the primary key + // index to scan the table for data Remove the following temporary practices after primary key indexing is complete + if (zero_result) { + if (Item::Type::SUBSELECT_ITEM == (conds->type())) { + zero_result = false; + } else { + Item_cond *item_cond = dynamic_cast(conds); + if (item_cond) { + List_iterator_fast li(*item_cond->argument_list()); + Item *item; + while ((item = li++)) { + if (item && Item::Type::SUBSELECT_ITEM == (item->type())) { + zero_result = false; + break; + } + } + } + } + } + + // When join_list has no elements and field has sp, tmp table is used and de-duplicated + // Use use_tmp_when_no_join when AddJoins + bool use_tmp_when_no_join = false; + if (!join_list->elements) { + List_iterator_fast li(*fields); + for (Item *item = li++; item; item = li++) { + if ((item->type() == Item::Type::FUNC_ITEM) && + ((down_cast(item)->functype() == Item_func::Functype::FUNC_SP) || + (down_cast(item)->functype() == Item_func::Functype::SUSERVAR_FUNC)) && + (!sl->is_distinct())) { + sl->add_active_options(SELECT_DISTINCT); + sl->join->select_distinct = TRUE; + use_tmp_when_no_join = true; + break; + } // else if (item->type() == Item::Type::) + } + } + + // partial optimization of LOJ conditions, JOIN::optimize(part=3) necessary due to already done basic transformation + // of conditions see comments in sql_select.cc:JOIN::optimize() + if (IsLOJ(join_list) && + ((!sl->join->where_cond) || (sl->join->where_cond && (uint64_t)sl->join->where_cond != 0x01))) { + sl->join->optimize(OptimizePhase::Finish_LOJ_Transform); + } + + Item *field_for_subselect; + Item *cond_to_reinsert = nullptr; + List *list_to_reinsert = nullptr; + + TabID tmp_table; + try { + if (left_expr_for_subselect) + if (!ClearSubselectTransformation(*oper_for_subselect, field_for_subselect, conds, having, cond_to_reinsert, + list_to_reinsert, left_expr_for_subselect)) + throw CompilationError(); + + if (having && !group) // we cannot handle the case of a having without a group by + throw CompilationError(); + + // handle table list + TABLE_LIST *tables = sl->leaf_tables ? sl->leaf_tables : (TABLE_LIST *)sl->table_list.first; + for (TABLE_LIST *table_ptr = tables; table_ptr; table_ptr = table_ptr->next_leaf) { + if (!table_ptr->is_view_or_derived()) { + if (!Engine::IsTianmuTable(table_ptr->table)) + throw CompilationError(); + + std::string path = TablePath(table_ptr); + if (path2num.find(path) == path2num.end()) { + path2num[path] = NumOfTabs(); + AddTable(m_conn->GetTableByPath(path)); + TIANMU_LOG(LogCtl_Level::DEBUG, "add query table: %s", path.c_str()); + } + } + } + + if (!sl->leaf_table_count && !use_tmp_when_no_join) { // process select xxx or select xxx from dual. + TabID tab(-NumOfTabs() - 1); + cq->TmpTable(tmp_table, tab, TableSubType::DUAL, false); + use_tmp_when_no_join = true; + } else { // handle join & join cond, which has table(s). + std::vector left_tables, right_tables; + bool first_table = true; + if (QueryRouteTo::kToMySQL == AddJoins(*join_list, tmp_table, left_tables, right_tables, + (res_tab != nullptr && res_tab->n != 0), first_table, for_subq_in_where, + use_tmp_when_no_join)) + throw CompilationError(); + } + + // handle fields + List field_list_for_subselect; + if (left_expr_for_subselect && field_for_subselect) { + field_list_for_subselect.push_back(field_for_subselect); + fields = &field_list_for_subselect; + } + bool aggr_used = false; + if (sl->has_sj_nests && group != nullptr) { + // handle semi-join fields (use on group by) + if (QueryRouteTo::kToMySQL == AddSemiJoinFiled(*fields, *join_list, tmp_table)) + throw CompilationError(); + } else { + // handle normal fields + if (QueryRouteTo::kToMySQL == + AddFields(*fields, tmp_table, TabID(), group != nullptr, col_count, ignore_minmax, aggr_used)) + throw CompilationError(); + if (QueryRouteTo::kToMySQL == AddGroupByFields(group, tmp_table, TabID())) + throw CompilationError(); + bool group_by_clause = group != nullptr || sl->join->select_distinct || aggr_used || sl->has_sj_nests; + if (QueryRouteTo::kToMySQL == AddOrderByFields(order, tmp_table, TabID(), group_by_clause)) + throw CompilationError(); + } + + // handle where cond + CondID cond_id; + (BuildConditions(conds, cond_id, cq, tmp_table, CondType::WHERE_COND, zero_result) == QueryRouteTo::kToMySQL) + ? throw CompilationError() + : cq->AddConds(tmp_table, cond_id, CondType::WHERE_COND); + + // handle having cond + cond_id = CondID(); + (BuildConditions(having, cond_id, cq, tmp_table, CondType::HAVING_COND) == QueryRouteTo::kToMySQL) + ? throw CompilationError() + : cq->AddConds(tmp_table, cond_id, CondType::HAVING_COND); + + // apply the condition to tmp_table. + cq->ApplyConds(tmp_table); + + // handle group by & order by after semi-join + if (sl->has_sj_nests) { + if (group != nullptr) { + cq->Mode(tmp_table, TMParameter::TM_DISTINCT); + TabID new_tmp_table; + cq->TmpTable(new_tmp_table, tmp_table, TableSubType::NORMAL, false); + + // process all fields. + (AddFields(*fields, new_tmp_table, tmp_table, group != nullptr, col_count, ignore_minmax, aggr_used) == + QueryRouteTo::kToMySQL) + ? throw CompilationError() + : TIANMU_LOG(LogCtl_Level::DEBUG, "AddFields process success, temp_table: %d.", tmp_table.n); + // process group by clause. + (AddGroupByFields(group, new_tmp_table, tmp_table) == QueryRouteTo::kToMySQL) + ? throw CompilationError() + : TIANMU_LOG(LogCtl_Level::DEBUG, "AddGroupByFields process success, temp_table: %d", tmp_table.n); + // process order by clause. + (AddOrderByFields(order, new_tmp_table, tmp_table, + group != nullptr || sl->join->select_distinct || aggr_used) == QueryRouteTo::kToMySQL) + ? throw CompilationError() + : TIANMU_LOG(LogCtl_Level::DEBUG, "AddGroupByFields process success, tmp_table: %d", tmp_table.n); + tmp_table = new_tmp_table; + } else { + cq->Mode(tmp_table, TMParameter::TM_DISTINCT); + } + } + } catch (...) { + // restore original values of class fields (necessary if this method is called recursively) + cq = saved_cq; + if (cond_to_reinsert && list_to_reinsert) + list_to_reinsert->push_back(cond_to_reinsert); + if (ifNewJoinForTianmu) + sl->cleanup(true); + return QueryRouteTo::kToMySQL; + } + + if (sl->join->select_distinct) + cq->Mode(tmp_table, TMParameter::TM_DISTINCT); + if (!ignore_limit && limit_value >= 0 && !sl->has_sj_nests) + cq->Mode(tmp_table, TMParameter::TM_TOP, offset_value, limit_value); + + if (sl == selects_list) { + prev_result = tmp_table; + if (global_order && !selects_list->next_select()) { // trivial union with one select and ext. order by + tmp_table = TabID(); + cq->Union(prev_result, prev_result, tmp_table, true); + } + } else + cq->Union(prev_result, prev_result, tmp_table, union_all); + + if (sl == last_distinct) + union_all = true; + + if (cond_to_reinsert && list_to_reinsert) + list_to_reinsert->push_back(cond_to_reinsert); + + if (ifNewJoinForTianmu) + sl->cleanup(true); + } + + cq->BuildTableIDStepsMap(); + + if (QueryRouteTo::kToMySQL == AddGlobalOrderByFields(global_order, prev_result, col_count)) + return QueryRouteTo::kToMySQL; + + if (!ignore_limit && global_limit_value >= 0) + cq->Mode(prev_result, TMParameter::TM_TOP, global_offset_value, global_limit_value); + + if (res_tab != nullptr) + *res_tab = prev_result; + else + cq->Result(prev_result); + cq = saved_cq; + return QueryRouteTo::kToTianmu; +} + +JoinType Query::GetJoinTypeAndCheckExpr(uint outer_join, Item *on_expr) { + if (outer_join) + ASSERT(on_expr != 0, "on_expr shouldn't be null when outer_join != 0"); + + JoinType join_type; + + if ((outer_join & JOIN_TYPE_LEFT) && (outer_join & JOIN_TYPE_RIGHT)) + join_type = JoinType::JO_FULL; + else if (outer_join & JOIN_TYPE_LEFT) + join_type = JoinType::JO_LEFT; + else if (outer_join & JOIN_TYPE_RIGHT) + join_type = JoinType::JO_RIGHT; + else + join_type = JoinType::JO_INNER; + + return join_type; +} + +bool Query::IsLOJ(List *join) { + TABLE_LIST *join_ptr{nullptr}; + List_iterator li(*join); + while ((join_ptr = li++)) { + JoinType join_type = GetJoinTypeAndCheckExpr(join_ptr->outer_join, join_ptr->join_cond()); + if (join_ptr->join_cond() && (join_type == JoinType::JO_LEFT || join_type == JoinType::JO_RIGHT)) + return true; + } + return false; +} + +} // namespace core +} // namespace Tianmu diff --git a/storage/tianmu/core/temp_table.cpp b/storage/tianmu/core/temp_table.cpp index 6195067c4..116efb6f4 100644 --- a/storage/tianmu/core/temp_table.cpp +++ b/storage/tianmu/core/temp_table.cpp @@ -937,7 +937,7 @@ void TempTable::Attr::SetNewPageSize(uint new_page_size) { } TempTable::TempTable(const TempTable &t, bool is_vc_owner) - : filter(t.filter), output_mind(t.output_mind), is_vc_owner(is_vc_owner), m_conn(t.m_conn) { + : filter(t.filter), output_mind(t.output_mind), is_vc_owner(is_vc_owner), m_conn(t.m_conn), sub_type(t.sub_type) { no_obj = t.no_obj; materialized = t.materialized; aliases = t.aliases; @@ -955,9 +955,11 @@ TempTable::TempTable(const TempTable &t, bool is_vc_owner) force_full_materialize = t.force_full_materialize; no_materialized = t.no_materialized; no_global_virt_cols = int(t.virt_cols.size()); + for (uint i = 0; i < t.attrs.size(); i++) { attrs.push_back(new Attr(*t.attrs[i])); } + is_sent = t.is_sent; mem_scale = t.mem_scale; rough_is_empty = t.rough_is_empty; @@ -982,9 +984,10 @@ TempTable::~TempTable() { } void TempTable::TranslateBackVCs() { - for (int i = 0; i < no_global_virt_cols; i++) + for (int i = 0; i < no_global_virt_cols; i++) { if (virt_cols[i] && static_cast(virt_cols[i]->IsSingleColumn())) static_cast(virt_cols[i])->TranslateSourceColumns(attr_back_translation); + } } std::shared_ptr TempTable::CreateMaterializedCopy(bool translate_order, @@ -1001,14 +1004,17 @@ std::shared_ptr TempTable::CreateMaterializedCopy(bool translate_orde copy_buf.push_back(attrs[i]->buffer); attrs[i]->buffer = nullptr; } + if (no_global_virt_cols != -1) { // this is a TempTable copy for (uint i = no_global_virt_cols; i < virt_cols.size(); i++) { delete virt_cols[i]; virt_cols[i] = nullptr; } + virt_cols.resize(no_global_virt_cols); } + std::shared_ptr working_copy = Create(*this, in_subq); // Original VCs of this will be copied to // working_copy, and then deleted in its // destructor @@ -1030,6 +1036,7 @@ std::shared_ptr TempTable::CreateMaterializedCopy(bool translate_orde filter.mind_->AddDimension_cross(no_obj); if (virt_cols.size() < attrs.size()) virt_cols.resize(attrs.size()); + fill(virt_cols.begin(), virt_cols.end(), (vcolumn::VirtualColumn *)nullptr); for (uint i = 0; i < attrs.size(); i++) { vcolumn::VirtualColumn *new_vc = @@ -1038,9 +1045,9 @@ std::shared_ptr TempTable::CreateMaterializedCopy(bool translate_orde attrs[i]->term.vc = new_vc; attrs[i]->dim = 0; } + if (translate_order) { - order_by.clear(); // translation needed: virt_cols should point to - // working_copy as a data source + order_by.clear(); // translation needed: virt_cols should point to working_copy as a data source for (uint i = 0; i < working_copy->virt_cols.size(); i++) { vcolumn::VirtualColumn *orig_vc = working_copy->virt_cols[i]; // if(in_subq && orig_vc->IsSingleColumn()) @@ -1065,6 +1072,7 @@ std::shared_ptr TempTable::CreateMaterializedCopy(bool translate_orde } } } + for (uint i = 0; i < working_copy->order_by.size(); i++) { SortDescriptor sord; sord.vc = working_copy->order_by[i].vc; @@ -1077,21 +1085,19 @@ std::shared_ptr TempTable::CreateMaterializedCopy(bool translate_orde // find which working_copy->vc is used in ordering for (uint j = 0; j < working_copy->virt_cols.size(); j++) { if (/*working_copy->*/ order_by[i].vc == working_copy->virt_cols[j]) { - // order_by[i].vc = working_copy->virt_cols[j]; - MoveVC(j, working_copy->virt_cols, - virt_cols); // moving vc - now it is back in this + MoveVC(j, working_copy->virt_cols, virt_cols); // moving vc - now it is back in this break; } } } } + no_global_virt_cols = (int)virt_cols.size(); return working_copy; // must be deleted by DeleteMaterializedCopy() } void TempTable::DeleteMaterializedCopy(std::shared_ptr &old_t) // delete the external table and remove - // VC pointers, make this fully - // materialized + // VC pointers, make this fully materialized { MEASURE_FET("TempTable::DeleteMaterializedCopy(...)"); for (uint i = 0; i < attrs.size(); i++) { // Make sure VCs are deleted before the source table is deleted @@ -1099,6 +1105,7 @@ void TempTable::DeleteMaterializedCopy(std::shared_ptr &old_t) // de delete virt_cols[i]; virt_cols[i] = nullptr; } + old_t.reset(); } @@ -1108,6 +1115,7 @@ void TempTable::MoveVC(int colnum, std::vector &from, to.push_back(vc); from[colnum] = nullptr; std::vector vv = vc->GetChildren(); + for (size_t i = 0; i < vv.size(); i++) MoveVC(vv[i], from, to); } @@ -1316,25 +1324,31 @@ void TempTable::Union(TempTable *t, int all) { DEBUG_ASSERT(NumOfDisplaybleAttrs() == t->NumOfDisplaybleAttrs()); if (NumOfDisplaybleAttrs() != t->NumOfDisplaybleAttrs()) throw common::NotImplementedException("UNION of tables with different number of columns."); + if (this->IsParametrized() || t->IsParametrized()) throw common::NotImplementedException("Materialize: not implemented union of parameterized queries."); + tianmu_control_.lock(m_conn->GetThreadID()) << "UNION: materializing components." << system::unlock; this->Materialize(); t->Materialize(); + if ((!t->NumOfObj() && all) || (!this->NumOfObj() && !t->NumOfObj())) // no objects = no union return; - Filter first_f(NumOfObj(), p_power), first_mask(NumOfObj(), - p_power); // mask of objects to be added to the final result set + // mask of objects to be added to the final result set + Filter first_f(NumOfObj(), p_power), first_mask(NumOfObj(), p_power); Filter sec_f(t->NumOfObj(), p_power), sec_mask(t->NumOfObj(), p_power); first_mask.Set(); sec_mask.Set(); + if (!all) { tianmu_control_.lock(m_conn->GetThreadID()) << "UNION: excluding repetitions." << system::unlock; Filter first_f(NumOfObj(), p_power); first_f.Set(); + Filter sec_f(t->NumOfObj(), p_power); sec_f.Set(); + GroupDistinctTable dist_table(p_power); using vc_ptr_t = std::shared_ptr; std::vector first_vcs; @@ -1349,19 +1363,23 @@ void TempTable::Union(TempTable *t, int all) { vc_ptr_t(new vcolumn::SingleColumn(GetDisplayableAttrP(i), &output_mind, 0, -i - 1, this, 0))); sec_vcs.push_back( vc_ptr_t(new vcolumn::SingleColumn(t->GetDisplayableAttrP(i), t->GetOutputMultiIndexP(), 1, -i - 1, t, 0))); + encoder.push_back(ColumnBinEncoder()); - bool encoder_created; - if (NumOfObj() == 0) - encoder_created = encoder[i].PrepareEncoder(sec_vcs[i].get()); - else if (t->NumOfObj() == 0) - encoder_created = encoder[i].PrepareEncoder(first_vcs[i].get()); - else + + bool encoder_created{false}; + if (NumOfObj() && t->NumOfObj()) encoder_created = encoder[i].PrepareEncoder(first_vcs[i].get(), sec_vcs[i].get()); + else { + encoder_created = NumOfObj() ? encoder[i].PrepareEncoder(first_vcs[i].get()) + : ((t->NumOfObj()) ? encoder[i].PrepareEncoder(sec_vcs[i].get()) : false); + } + if (!encoder_created) { std::stringstream ss; ss << "UNION of non-matching columns (column no " << i << ") ."; throw common::NotImplementedException(ss.str()); } + encoder[i].SetPrimaryOffset(size); size += encoder[i].GetPrimarySize(); } @@ -1375,53 +1393,68 @@ void TempTable::Union(TempTable *t, int all) { do { first_mit.Rewind(); dist_table.Clear(); + while (first_mit.IsValid()) { int64_t pos = **first_mit; if (first_f.Get(pos)) { - for (uint i = 0; i < encoder.size(); i++) encoder[i].Encode(input_buf, first_mit); + for (uint i = 0; i < encoder.size(); i++) { + encoder[i].Encode(input_buf, first_mit); + } + GDTResult res = dist_table.Add(input_buf); if (res == GDTResult::GDT_EXISTS) first_mask.ResetDelayed(pos); - if (res != GDTResult::GDT_FULL) // note: if v is omitted here, it will also be - // omitted in sec! + if (res != GDTResult::GDT_FULL) // note: if v is omitted here, it will also be omitted in sec! first_f.ResetDelayed(pos); } + ++first_mit; if (m_conn->Killed()) throw common::KilledException(); } + sec_mit.Rewind(); while (sec_mit.IsValid()) { int64_t pos = **sec_mit; if (sec_f.Get(pos)) { - for (uint i = 0; i < encoder.size(); i++) encoder[i].Encode(input_buf, sec_mit, sec_vcs[i].get()); + for (uint i = 0; i < encoder.size(); i++) { + encoder[i].Encode(input_buf, sec_mit, sec_vcs[i].get()); + } + GDTResult res = dist_table.Add(input_buf); if (res == GDTResult::GDT_EXISTS) sec_mask.ResetDelayed(pos); if (res != GDTResult::GDT_FULL) sec_f.ResetDelayed(pos); } + ++sec_mit; if (m_conn->Killed()) throw common::KilledException(); } + first_f.Commit(); - sec_f.Commit(); first_mask.Commit(); + + sec_f.Commit(); sec_mask.Commit(); } while (!first_f.IsEmpty() || !sec_f.IsEmpty()); } delete[] input_buf; } + int64_t first_no_obj = first_mask.NumOfOnes(); int64_t sec_no_obj = sec_mask.NumOfOnes(); int64_t new_no_obj = first_no_obj + sec_no_obj; + tianmu_control_.lock(m_conn->GetThreadID()) << "UNION: generating result (" << new_no_obj << " rows)." << system::unlock; + uint new_page_size = CalculatePageSize(new_no_obj); for (uint i = 0; i < NumOfDisplaybleAttrs(); i++) { Attr *first_attr = GetDisplayableAttrP(i); Attr *sec_attr = t->GetDisplayableAttrP(i); + ColumnType new_type = GetUnionType(first_attr->Type(), sec_attr->Type()); if (first_attr->Type() == sec_attr->Type() && first_mask.IsFull() && first_no_obj && sec_no_obj && first_attr->Type().GetPrecision() >= sec_attr->Type().GetPrecision()) { @@ -1440,35 +1473,39 @@ void TempTable::Union(TempTable *t, int all) { } continue; } + + // generates the output attrs. Attr *new_attr = new Attr(CQTerm(), common::ColOperation::LISTING, p_power, false, first_attr->alias, 0, new_type.GetTypeName(), new_type.GetScale(), new_type.GetPrecision(), new_type.NotNull(), new_type.GetCollation()); new_attr->page_size = new_page_size; new_attr->CreateBuffer(new_no_obj, m_conn); + if (first_attr->TypeName() == common::ColumnType::NUM && sec_attr->TypeName() == common::ColumnType::NUM && first_attr->Type().GetScale() != sec_attr->Type().GetScale()) { uint max_scale = new_attr->Type().GetScale(); + // copy attr from first table to new_attr double multiplier = types::PowOfTen(max_scale - first_attr->Type().GetScale()); FilterOnesIterator first_fi(&first_mask, p_power); for (int64_t j = 0; j < first_no_obj; j++) { int64_t pos = *first_fi; ++first_fi; - if (!first_attr->IsNull(pos)) - new_attr->SetValueInt64(j, first_attr->GetNotNullValueInt64(pos) * (int64_t)multiplier); - else - new_attr->SetValueInt64(j, common::NULL_VALUE_64); + + int64_t val = (!first_attr->IsNull(pos)) ? (first_attr->GetNotNullValueInt64(pos) * (int64_t)multiplier) + : common::NULL_VALUE_64; + new_attr->SetValueInt64(j, val); } + // copy attr from second table to new_attr multiplier = types::PowOfTen(max_scale - sec_attr->Type().GetScale()); FilterOnesIterator sec_fi(&sec_mask, p_power); for (int64_t j = 0; j < sec_no_obj; j++) { int64_t pos = *sec_fi; ++sec_fi; - if (!sec_attr->IsNull(pos)) - new_attr->SetValueInt64(first_no_obj + j, sec_attr->GetNotNullValueInt64(pos) * (int64_t)multiplier); - else - new_attr->SetValueInt64(first_no_obj + j, common::NULL_VALUE_64); + int64_t val = (!sec_attr->IsNull(pos)) ? (sec_attr->GetNotNullValueInt64(pos) * (int64_t)multiplier) + : common::NULL_VALUE_64; + new_attr->SetValueInt64(first_no_obj + j, val); } } else if (ATI::IsStringType(new_attr->TypeName())) { types::BString s; @@ -1478,6 +1515,7 @@ void TempTable::Union(TempTable *t, int all) { new_attr->SetValueString(j, s); ++first_fi; } + FilterOnesIterator sec_fi(&sec_mask, p_power); for (int64_t j = 0; j < sec_no_obj; j++) { sec_attr->GetValueString(s, *sec_fi); @@ -1489,6 +1527,7 @@ void TempTable::Union(TempTable *t, int all) { for (int64_t j = 0; j < first_no_obj; j++) { int64_t pos = *first_fi; ++first_fi; + if (first_attr->IsNull(pos)) new_attr->SetValueInt64(j, common::NULL_VALUE_64); else if (first_attr->Type().IsFloat()) @@ -1498,10 +1537,12 @@ void TempTable::Union(TempTable *t, int all) { new_attr->SetValueInt64(j, *(int64_t *)&v); } } + FilterOnesIterator sec_fi(&sec_mask, p_power); for (int64_t j = 0; j < sec_no_obj; j++) { int64_t pos = *sec_fi; ++sec_fi; + if (sec_attr->IsNull(pos)) new_attr->SetValueInt64(first_no_obj + j, common::NULL_VALUE_64); else if (sec_attr->Type().IsFloat()) @@ -1511,44 +1552,48 @@ void TempTable::Union(TempTable *t, int all) { new_attr->SetValueInt64(first_no_obj + j, *(int64_t *)&v); } } - } else { + } else { // other types. // copy attr from first table to new_attr double multiplier = types::PowOfTen(new_attr->Type().GetScale() - first_attr->Type().GetScale()); FilterOnesIterator first_fi(&first_mask, p_power); for (int64_t j = 0; j < first_no_obj; j++) { int64_t pos = *first_fi; ++first_fi; + if (first_attr->IsNull(pos)) new_attr->SetValueInt64(j, common::NULL_VALUE_64); - else if (multiplier == 1.0) // do not multiply by 1.0, as it causes - // precision problems on bigint + else if (multiplier == 1.0) // do not multiply by 1.0, as it causes precision problems on bigint new_attr->SetValueInt64(j, first_attr->GetNotNullValueInt64(pos)); else new_attr->SetValueInt64(j, (int64_t)(first_attr->GetNotNullValueInt64(pos) * multiplier)); } + multiplier = types::PowOfTen(new_attr->Type().GetScale() - sec_attr->Type().GetScale()); FilterOnesIterator sec_fi(&sec_mask, p_power); for (int64_t j = 0; j < sec_no_obj; j++) { int64_t pos = *sec_fi; ++sec_fi; + if (sec_attr->IsNull(pos)) new_attr->SetValueInt64(first_no_obj + j, common::NULL_VALUE_64); - else if (multiplier == 1.0) // do not multiply by 1.0, as it causes - // precision problems on bigint + else if (multiplier == 1.0) // do not multiply by 1.0, as it causes precision problems on bigint new_attr->SetValueInt64(first_no_obj + j, sec_attr->GetNotNullValueInt64(pos)); else new_attr->SetValueInt64(first_no_obj + j, (int64_t)(sec_attr->GetNotNullValueInt64(pos) * multiplier)); } } + attrs[GetDisplayableAttrIndex(i)] = new_attr; displayable_attr[i] = new_attr; delete first_attr; } + SetNumOfMaterialized(new_no_obj); // this->no_obj = new_no_obj; // this->Display(); output_mind.Clear(); output_mind.AddDimension_cross(no_obj); + return; } void TempTable::Union(TempTable *t, [[maybe_unused]] int all, ResultSender *sender, int64_t &g_offset, @@ -1683,13 +1728,17 @@ types::TianmuValueObject TempTable::GetValueObject(int64_t obj, uint attr) { uint TempTable::CalculatePageSize(int64_t _no_obj) { int64_t new_no_obj = _no_obj == -1 ? no_obj : _no_obj; uint size_of_one_record = 0; - for (uint i = 0; i < attrs.size(); i++) - if (attrs[i]->TypeName() == common::ColumnType::BIN || attrs[i]->TypeName() == common::ColumnType::BYTE || - attrs[i]->TypeName() == common::ColumnType::VARBYTE || attrs[i]->TypeName() == common::ColumnType::LONGTEXT || - attrs[i]->TypeName() == common::ColumnType::STRING || attrs[i]->TypeName() == common::ColumnType::VARCHAR) - size_of_one_record += attrs[i]->Type().GetInternalSize() + 4; // 4 bytes describing length + + // for (uint i = 0; i < attrs.size(); i++) { + for (auto attr : attrs) { + if (attr->TypeName() == common::ColumnType::BIN || attr->TypeName() == common::ColumnType::BYTE || + attr->TypeName() == common::ColumnType::VARBYTE || attr->TypeName() == common::ColumnType::LONGTEXT || + attr->TypeName() == common::ColumnType::STRING || attr->TypeName() == common::ColumnType::VARCHAR) + size_of_one_record += attr->Type().GetInternalSize() + 4; // 4 bytes describing length else - size_of_one_record += attrs[i]->Type().GetInternalSize(); + size_of_one_record += attr->Type().GetInternalSize(); + } + uint raw_size = (uint)new_no_obj; if (size_of_one_record < 1) size_of_one_record = 1; @@ -1889,11 +1938,12 @@ std::shared_ptr TempTable::Create(const TempTable &t, bool in_subq) { return tnew; } -std::shared_ptr TempTable::Create(JustATable *const t, int alias, Query *q, bool for_subq) { +std::shared_ptr TempTable::Create(JustATable *const t, int alias, Query *q, TableSubType sub_type, + bool for_subq) { if (for_subq) return std::shared_ptr(new TempTableForSubquery(t, alias, q)); else - return std::shared_ptr(new TempTable(t, alias, q)); + return std::shared_ptr(new TempTable(t, alias, q, sub_type)); } ColumnType TempTable::GetUnionType(ColumnType type1, ColumnType type2) { @@ -1948,6 +1998,7 @@ void TempTable::Materialize(bool in_subq, ResultSender *sender, bool lazy) { sender->SetAffectRows(no_obj); CreateDisplayableAttrP(); CalculatePageSize(); + int64_t offset = 0; // controls the first object to be materialized int64_t limit = -1; // if(limit>=0) -> for(row = offset; row < offset + // limit; row++) .... @@ -1962,6 +2013,7 @@ void TempTable::Materialize(bool in_subq, ResultSender *sender, bool lazy) { mode.param2 = -1; mode.param1 = 0; } + int64_t local_offset = 0; // controls the first object to be materialized in a given algorithm int64_t local_limit = -1; if (materialized && (order_by.size() > 0 || limits_present) && no_obj) { @@ -1974,19 +2026,21 @@ void TempTable::Materialize(bool in_subq, ResultSender *sender, bool lazy) { local_limit = local_limit < 0 ? 0 : local_limit; } else local_limit = no_obj; + if (exists_only) { - if (local_limit == 0) // else no change needed - no_obj = 0; + no_obj = (local_limit == 0) ? 0 : no_obj; return; } if (order_by.size() != 0 && no_obj > 1) { std::shared_ptr temporary_source_table = CreateMaterializedCopy(true, in_subq); // true: translate definition of ordering + OrderByAndMaterialize(order_by, local_limit, local_offset); DeleteMaterializedCopy(temporary_source_table); } else if (limits_present) ApplyOffset(local_limit, local_offset); + order_by.clear(); return; } @@ -1997,9 +2051,11 @@ void TempTable::Materialize(bool in_subq, ResultSender *sender, bool lazy) { bool table_distinct = this->mode.distinct; bool distinct_on_materialized = false; - for (uint i = 0; i < NumOfAttrs(); i++) + for (uint i = 0; i < NumOfAttrs(); i++) { if (attrs[i]->mode != common::ColOperation::LISTING) group_by = true; + } + if (table_distinct && group_by) { distinct_on_materialized = true; table_distinct = false; @@ -2011,54 +2067,57 @@ void TempTable::Materialize(bool in_subq, ResultSender *sender, bool lazy) { bool no_rows_too_large = filter.mind_->TooManyTuples(); no_obj = -1; // no_obj not calculated yet - wait for better moment - VerifyAttrsSizes(); // resize attr[i] buffers basing on the current - // multiindex state + VerifyAttrsSizes(); // resize attr[i] buffers basing on the current multiindex state - // the case when there is no grouping of attributes, check also DISTINCT - // modifier of TT + // the case when there is no grouping of attributes, check also DISTINCT modifier of TT if (!group_by && !table_distinct) { - DEBUG_ASSERT(!distinct_on_materialized); // should by false here, otherwise must be - // added to conditions below + DEBUG_ASSERT(!distinct_on_materialized); // should by false here, otherwise must be added to conditions below if (limits_present) { - if (no_rows_too_large && order_by.size() == 0) - no_obj = offset + limit; // offset + limit in the worst case + // offset + limit in the worst case : or normal case. + if (sub_type == TableSubType::CONST || sub_type == TableSubType::DUAL) + no_obj = 1; else - no_obj = filter.mind_->NumOfTuples(); + no_obj = (no_rows_too_large && order_by.size() == 0) ? (offset + limit) : filter.mind_->NumOfTuples(); + if (no_obj <= offset) { no_obj = 0; materialized = true; order_by.clear(); return; } + local_offset = offset; local_limit = std::min(limit, (int64_t)no_obj - offset); local_limit = local_limit < 0 ? 0 : local_limit; } else { - no_obj = filter.mind_->NumOfTuples(); + if (sub_type == TableSubType::CONST || sub_type == TableSubType::DUAL) + no_obj = 1; + else { + no_obj = filter.mind_->NumOfTuples(); + } local_limit = no_obj; } + if (exists_only) { order_by.clear(); return; } + output_mind.Clear(); output_mind.AddDimension_cross(local_limit); // an artificial dimension for result CalculatePageSize(); // recalculate, as no_obj might changed - // perform order by: in this case it can be done on source tables, not on - // the result + // perform order by: in this case it can be done on source tables, not on the result bool materialized_by_ordering = false; - if (CanOrderSources()) - // false if no sorting used + if (CanOrderSources()) // false if no sorting used materialized_by_ordering = this->OrderByAndMaterialize(order_by, local_limit, local_offset, sender); + if (!materialized_by_ordering) { // not materialized yet? // materialize without aggregations. If ordering then do not send result - if (order_by.size() == 0) - FillMaterializedBuffers(local_limit, local_offset, sender, lazy); - else // in case of order by we need to materialize all rows to be next - // ordered - FillMaterializedBuffers(no_obj, 0, nullptr, lazy); + // or in case of order by we need to materialize all rows to be next ordered + (order_by.size() == 0) ? FillMaterializedBuffers(local_limit, local_offset, sender, lazy) + : FillMaterializedBuffers(no_obj, 0, nullptr, lazy); } } else { // GROUP BY or DISTINCT - compute aggregations @@ -2068,13 +2127,14 @@ void TempTable::Materialize(bool in_subq, ResultSender *sender, bool lazy) { if (exists_only) local_limit = 1; } + if (HasHavingConditions() && in_subq) having_conds[0].tree->Simplify(true); ResultSender *local_sender = (distinct_on_materialized || order_by.size() > 0 ? nullptr : sender); AggregationAlgorithm aggr(this); - aggr.Aggregate(table_distinct, local_limit, local_offset, - local_sender); // this->tree (HAVING) used inside + aggr.Aggregate(table_distinct, local_limit, local_offset, local_sender); // this->tree (HAVING) used inside + if (no_obj == 0) { order_by.clear(); return; @@ -2101,18 +2161,20 @@ void TempTable::Materialize(bool in_subq, ResultSender *sender, bool lazy) { } } else local_limit = no_obj; + if (exists_only) local_limit = 1; + std::shared_ptr temporary_source_table = CreateMaterializedCopy(false, in_subq); ResultSender *local_sender = (order_by.size() > 0 ? nullptr : sender); AggregationAlgorithm aggr(this); - aggr.Aggregate(true, local_limit, local_offset, - local_sender); // true => select-level distinct + aggr.Aggregate(true, local_limit, local_offset, local_sender); // true => select-level distinct DeleteMaterializedCopy(temporary_source_table); output_mind.Clear(); output_mind.AddDimension_cross(no_obj); // an artificial dimension for result - } // end of distinct part + } + // ORDER BY, if not sorted until now if (order_by.size() != 0) { if (limits_present) { @@ -2127,17 +2189,21 @@ void TempTable::Materialize(bool in_subq, ResultSender *sender, bool lazy) { } } else local_limit = no_obj; + if (no_obj > 1 && !exists_only) { - std::shared_ptr temporary_source_table = - CreateMaterializedCopy(true, in_subq); // true: translate definition of ordering + // true: translate definition of ordering + std::shared_ptr temporary_source_table = CreateMaterializedCopy(true, in_subq); OrderByAndMaterialize(order_by, local_limit, local_offset, sender); DeleteMaterializedCopy(temporary_source_table); } + order_by.clear(); output_mind.Clear(); output_mind.AddDimension_cross(no_obj); // an artificial dimension for result } + materialized = true; + return; } // here we deal with both signed/unsigned, the exact values will be converted on send results phase. diff --git a/storage/tianmu/core/temp_table.h b/storage/tianmu/core/temp_table.h index 3d195b8fb..91112e256 100644 --- a/storage/tianmu/core/temp_table.h +++ b/storage/tianmu/core/temp_table.h @@ -187,7 +187,7 @@ class TempTable : public JustATable { protected: TempTable(const TempTable &, bool is_vc_owner); - TempTable(JustATable *const, int alias, Query *q); + TempTable(JustATable *const, int alias, Query *q, TableSubType subtype = TableSubType::NORMAL); std::shared_ptr CreateMaterializedCopy(bool translate_order, bool in_subq); // move all buffers to a newly created @@ -253,6 +253,7 @@ class TempTable : public JustATable { no_materialized = n; } TType TableType() const override { return TType::TEMP_TABLE; } // type of JustATable - TempTable + TableSubType getSubType() { return sub_type; } // the sub type of TempTable uint NumOfAttrs() const override { return (uint)attrs.size(); } uint NumOfDisplaybleAttrs() const override { return no_cols; } // no. of columns with defined alias bool IsDisplayAttr(int i) { return attrs[i]->alias != nullptr; } @@ -393,9 +394,10 @@ class TempTable : public JustATable { bool CanCondPushDown() { return can_cond_push_down; }; protected: - int64_t no_obj; + int64_t no_obj; // no. of objs.(or rows.) uint32_t p_power; // pack power uint no_cols; // no. of output columns, i.e., with defined alias + TableSubType sub_type; // table sub type. TableMode mode; // based on { TM_DISTINCT, TM_TOP, TM_EXISTS } std::vector attrs; // vector of output columns, each column contains // a buffer with values @@ -456,7 +458,8 @@ class TempTable : public JustATable { void Display(std::ostream &out = std::cout); // output to console static std::shared_ptr Create(const TempTable &, bool in_subq); - static std::shared_ptr Create(JustATable *const, int alias, Query *q, bool for_subquery = false); + static std::shared_ptr Create(JustATable *const, int alias, Query *q, TableSubType sub_type, + bool for_subquery = false); bool IsSent() { return is_sent; } void SetIsSent() { is_sent = true; } common::Tribool RoughIsEmpty() { return rough_is_empty; } diff --git a/storage/tianmu/core/temp_table_com.cpp b/storage/tianmu/core/temp_table_com.cpp index 07b0baa20..ed10fd311 100644 --- a/storage/tianmu/core/temp_table_com.cpp +++ b/storage/tianmu/core/temp_table_com.cpp @@ -28,8 +28,12 @@ namespace Tianmu { namespace core { -TempTable::TempTable(JustATable *t, int alias, Query *q) - : mem_scale(-1), filter(t->Getpackpower()), output_mind(t->Getpackpower()), m_conn(current_txn_) { +TempTable::TempTable(JustATable *t, int alias, Query *q, TableSubType subtype) + : mem_scale(-1), + sub_type(subtype), + filter(t->Getpackpower()), + output_mind(t->Getpackpower()), + m_conn(current_txn_) { p_power = t->Getpackpower(); filter.table_ = this; tables.push_back(t); @@ -43,16 +47,16 @@ TempTable::TempTable(JustATable *t, int alias, Query *q) else ((TempTable *)t)->Materialize(false, nullptr, false); - filter.mind_->AddDimension_cross(t->NumOfObj()); + uint64_t tuple_nums = (subtype == TableSubType::CONST || subtype == TableSubType::DUAL) ? 1 : t->NumOfObj(); + filter.mind_->AddDimension_cross(tuple_nums); } else { filter.mind_->AddDimension_cross(t->NumOfObj()); } if (filter.mind_->TooManyTuples()) - no_obj = common::NULL_VALUE_64; // a big, improper number, which we hope to - // be changed after conditions are applied + no_obj = common::NULL_VALUE_64; // a big, improper number, which we hope to be changed after conditions are applied else - no_obj = filter.mind_->NumOfTuples(); + no_obj = (sub_type == TableSubType::DUAL || sub_type == TableSubType::CONST) ? 1 : filter.mind_->NumOfTuples(); no_cols = 0; no_global_virt_cols = 0; diff --git a/storage/tianmu/core/temp_table_low.cpp b/storage/tianmu/core/temp_table_low.cpp index a6af0a723..5319e051d 100644 --- a/storage/tianmu/core/temp_table_low.cpp +++ b/storage/tianmu/core/temp_table_low.cpp @@ -393,7 +393,9 @@ void TempTable::FillMaterializedBuffers(int64_t local_limit, int64_t local_offse if (page_end > no_obj + local_offset) page_end = no_obj + local_offset; - for (uint i = 0; i < NumOfAttrs(); i++) attrs[i]->CreateBuffer(page_end - start_row, m_conn, pagewise); + for (uint i = 0; i < NumOfAttrs(); i++) { + attrs[i]->CreateBuffer(page_end - start_row, m_conn, pagewise); + } auto &attr = attrs[0]; if (attr->NeedFill()) { @@ -528,34 +530,35 @@ std::vector TempTable::GetATIs(bool orig) { } #define STRING_LENGTH_THRESHOLD 512 -void TempTable::VerifyAttrsSizes() // verifies attr[i].field_size basing on the - // current multiindex contents +void TempTable::VerifyAttrsSizes() // verifies attr[i].field_size basing on the current multiindex contents { - for (uint i = 0; i < attrs.size(); i++) - if (ATI::IsStringType(attrs[i]->TypeName())) { - // reduce string size when column defined too large to reduce allocated - // temp memory - if (attrs[i]->term.vc->MaxStringSize() < STRING_LENGTH_THRESHOLD) { - attrs[i]->OverrideStringSize(attrs[i]->term.vc->MaxStringSize()); - } else { - vcolumn::VirtualColumn *vc = attrs[i]->term.vc; - int max_length = attrs[i]->term.vc->MaxStringSize(); - if (dynamic_cast(vc)) { - auto &var_map = dynamic_cast(vc)->GetVarMap(); - for (auto &it : var_map) { - PhysicalColumn *column = it.GetTabPtr()->GetColumn(it.col_ndx); - ColumnType ct = column->Type(); - uint precision = ct.GetPrecision(); - if (precision >= STRING_LENGTH_THRESHOLD) { - uint actual_size = column->MaxStringSize() * ct.GetCollation().collation->mbmaxlen; - if (actual_size < precision) - max_length += (actual_size - precision); - } + for (uint i = 0; i < attrs.size(); i++) { + if (!ATI::IsStringType(attrs[i]->TypeName())) // and 'IsTxtType' or 'IsCharType' ? + continue; + + // reduce string size when column defined too large to reduce allocated + // temp memory + if (attrs[i]->term.vc->MaxStringSize() < STRING_LENGTH_THRESHOLD) { + attrs[i]->OverrideStringSize(attrs[i]->term.vc->MaxStringSize()); + } else { + vcolumn::VirtualColumn *vc = attrs[i]->term.vc; + int max_length = attrs[i]->term.vc->MaxStringSize(); + if (dynamic_cast(vc)) { + auto &var_map = dynamic_cast(vc)->GetVarMap(); + for (auto &it : var_map) { + PhysicalColumn *column = it.GetTabPtr()->GetColumn(it.col_ndx); + ColumnType ct = column->Type(); + uint precision = ct.GetPrecision(); + if (precision >= STRING_LENGTH_THRESHOLD) { + uint actual_size = column->MaxStringSize() * ct.GetCollation().collation->mbmaxlen; + if (actual_size < precision) + max_length += (actual_size - precision); } } - attrs[i]->OverrideStringSize(max_length); } + attrs[i]->OverrideStringSize(max_length); } + } } void TempTable::FillbufferTask(Attr *attr, Transaction *txn, MIIterator *page_start, int64_t start_row, diff --git a/storage/tianmu/handler/ha_tianmu.cpp b/storage/tianmu/handler/ha_tianmu.cpp index 8e2c78a83..02ad61646 100644 --- a/storage/tianmu/handler/ha_tianmu.cpp +++ b/storage/tianmu/handler/ha_tianmu.cpp @@ -1550,7 +1550,7 @@ const Item *ha_tianmu::cond_push(const Item *a_cond) { query_->AddTable(rctp); core::TabID t_out; cq_->TableAlias(t_out, core::TabID(0)); // we apply it to the only table in this query - cq_->TmpTable(tmp_table_, t_out); + cq_->TmpTable(tmp_table_, t_out, core::TableSubType::NORMAL); std::string ext_alias; if (table->pos_in_table_list->referencing_view) diff --git a/storage/tianmu/index/multi_index.cpp b/storage/tianmu/index/multi_index.cpp index 586cf60b6..645939c03 100644 --- a/storage/tianmu/index/multi_index.cpp +++ b/storage/tianmu/index/multi_index.cpp @@ -1,483 +1,510 @@ -/* Copyright (c) 2022 StoneAtom, Inc. All rights reserved. - Use is subject to license terms - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA -*/ - -#include "multi_index.h" - -#include "index/mi_new_contents.h" -#include "optimizer/group_distinct_table.h" -#include "optimizer/iterators/mi_iterator.h" -#include "optimizer/joiner.h" -#include "util/tools.h" - -namespace Tianmu { -namespace core { -#define MATERIAL_TUPLES_LIMIT 150000000000LL // = ~1 TB of cache needed for one dimension -#define MATERIAL_TUPLES_WARNING 2500000000LL // = 10-20 GB of cache for one dimension - -MultiIndex::MultiIndex(uint32_t power) : m_conn(current_txn_) { - // update Clear() on any change - p_power = power; - no_dimensions = 0; - no_tuples = 0; - no_tuples_too_big = false; - dim_size = nullptr; - group_for_dim = nullptr; - group_num_for_dim = nullptr; - iterator_lock = 0; - shallow_dim_groups = false; -} - -MultiIndex::MultiIndex(const MultiIndex &s) : m_conn(s.m_conn) { - p_power = s.p_power; - no_dimensions = s.no_dimensions; - no_tuples = s.no_tuples; - no_tuples_too_big = s.no_tuples_too_big; - if (no_dimensions > 0) { - dim_size = new int64_t[no_dimensions]; - group_for_dim = new DimensionGroup *[no_dimensions]; - group_num_for_dim = new int[no_dimensions]; - used_in_output = s.used_in_output; - can_be_distinct = s.can_be_distinct; - for (uint i = 0; i < s.dim_groups.size(); i++) dim_groups.push_back(s.dim_groups[i]->Clone(false)); - - for (int i = 0; i < no_dimensions; i++) dim_size[i] = s.dim_size[i]; - - FillGroupForDim(); - } else { - dim_size = nullptr; - group_for_dim = nullptr; - group_num_for_dim = nullptr; - } - iterator_lock = 0; - shallow_dim_groups = false; -} - -MultiIndex::MultiIndex(MultiIndex &s, bool shallow) : m_conn(s.m_conn) { - p_power = s.p_power; - no_dimensions = s.no_dimensions; - no_tuples = s.no_tuples; - no_tuples_too_big = s.no_tuples_too_big; - if (no_dimensions > 0) { - group_for_dim = new DimensionGroup *[no_dimensions]; - group_num_for_dim = new int[no_dimensions]; - dim_size = new int64_t[no_dimensions]; - used_in_output = s.used_in_output; - can_be_distinct = s.can_be_distinct; - for (uint i = 0; i < s.dim_groups.size(); i++) dim_groups.push_back(s.dim_groups[i]->Clone(shallow)); - - for (int i = 0; i < no_dimensions; i++) dim_size[i] = s.dim_size[i]; - - FillGroupForDim(); - } else { - dim_size = nullptr; - group_for_dim = nullptr; - group_num_for_dim = nullptr; - } - iterator_lock = 0; - shallow_dim_groups = shallow; -} - -MultiIndex::~MultiIndex() { - if (!shallow_dim_groups) { - for (uint i = 0; i < dim_groups.size(); i++) { - delete dim_groups[i]; - dim_groups[i] = nullptr; - } - } - delete[] dim_size; - delete[] group_for_dim; - delete[] group_num_for_dim; -} - -void MultiIndex::Clear() { - try { - for (uint i = 0; i < dim_groups.size(); i++) { - delete dim_groups[i]; - dim_groups[i] = nullptr; - } - } catch (...) { - DEBUG_ASSERT(!"exception from destructor"); - } - delete[] dim_size; - delete[] group_for_dim; - delete[] group_num_for_dim; - dim_groups.clear(); - no_dimensions = 0; - no_tuples = 0; - no_tuples_too_big = false; - dim_size = nullptr; - group_for_dim = nullptr; - group_num_for_dim = nullptr; - iterator_lock = 0; - can_be_distinct.clear(); - used_in_output.clear(); -} - -void MultiIndex::FillGroupForDim() { - MEASURE_FET("MultiIndex::FillGroupForDim(...)"); - int move_groups = 0; - for (uint i = 0; i < dim_groups.size(); i++) { // pack all holes - if (dim_groups[i] == nullptr) { - while (i + move_groups < dim_groups.size() && dim_groups[i + move_groups] == nullptr) move_groups++; - if (i + move_groups < dim_groups.size()) { - dim_groups[i] = dim_groups[i + move_groups]; - dim_groups[i + move_groups] = nullptr; - } else - break; - } - } - for (int i = 0; i < move_groups; i++) dim_groups.pop_back(); // clear nulls from the end - - for (int d = 0; d < no_dimensions; d++) { - group_for_dim[d] = nullptr; - group_num_for_dim[d] = -1; - } - - for (uint i = 0; i < dim_groups.size(); i++) { - for (int d = 0; d < no_dimensions; d++) - if (dim_groups[i]->DimUsed(d)) { - group_for_dim[d] = dim_groups[i]; - group_num_for_dim[d] = i; - } - } -} - -void MultiIndex::Empty(int dim_to_make_empty) { - if (dim_to_make_empty != -1) - group_for_dim[dim_to_make_empty]->Empty(); - else { - for (uint i = 0; i < dim_groups.size(); i++) dim_groups[i]->Empty(); - } - for (int i = 0; i < no_dimensions; i++) { - if (dim_to_make_empty == -1 || group_for_dim[dim_to_make_empty]->DimUsed(i)) { - can_be_distinct[i] = true; - used_in_output[i] = true; - } - } - no_tuples = 0; - no_tuples_too_big = false; -} - -void MultiIndex::AddDimension() { - no_dimensions++; - int64_t *ns = new int64_t[no_dimensions]; - DimensionGroup **ng = new DimensionGroup *[no_dimensions]; - int *ngn = new int[no_dimensions]; - for (int i = 0; i < no_dimensions - 1; i++) { - ns[i] = dim_size[i]; - ng[i] = group_for_dim[i]; - ngn[i] = group_num_for_dim[i]; - } - delete[] dim_size; - delete[] group_for_dim; - delete[] group_num_for_dim; - dim_size = ns; - group_for_dim = ng; - group_num_for_dim = ngn; - dim_size[no_dimensions - 1] = 0; - group_for_dim[no_dimensions - 1] = nullptr; - group_num_for_dim[no_dimensions - 1] = -1; - // Temporary code, for rare cases when we add a dimension after other joins - // (smk_33): - for (uint i = 0; i < dim_groups.size(); i++) dim_groups[i]->AddDimension(); - - return; // Note: other functions will use AddDimension() to enlarge tables -} - -void MultiIndex::AddDimension_cross(uint64_t size) { - AddDimension(); - int new_dim = no_dimensions - 1; - used_in_output.push_back(true); - if (no_dimensions > 1) - MultiplyNoTuples(size); - else - no_tuples = size; - DimensionGroupFilter *nf = nullptr; - if (size > 0) { - dim_size[new_dim] = size; - nf = new DimensionGroupFilter(new_dim, size, p_power); // redo - } else { // size == 0 => prepare a dummy dimension with empty filter - dim_size[new_dim] = 1; - nf = new DimensionGroupFilter(new_dim, dim_size[new_dim], p_power); // redo - nf->Empty(); - } - dim_groups.push_back(nf); - group_for_dim[new_dim] = nf; - group_num_for_dim[new_dim] = int(dim_groups.size() - 1); - can_be_distinct.push_back(true); // may be modified below - CheckIfVirtualCanBeDistinct(); -} - -void MultiIndex::MultiplyNoTuples(uint64_t factor) { - no_tuples = SafeMultiplication(no_tuples, factor); - if (no_tuples == static_cast(common::NULL_VALUE_64)) - no_tuples_too_big = true; -} - -void MultiIndex::CheckIfVirtualCanBeDistinct() // updates can_be_distinct table - // in case of virtual multiindex -{ - // check whether can_be_distinct can be true - // it is possible only when there are only 1-object dimensions - // and one multiobject (then the multiobject one can be distinct, the rest - // cannot) - if (no_dimensions > 1) { - int non_one_found = 0; - for (int i = 0; i < no_dimensions; i++) { - if (dim_size[i] > 1) - non_one_found++; - } - if (non_one_found == 1) { - for (int j = 0; j < no_dimensions; j++) - if (dim_size[j] == 1) - can_be_distinct[j] = false; - else - can_be_distinct[j] = true; - } - if (non_one_found > 1) - for (int j = 0; j < no_dimensions; j++) can_be_distinct[j] = false; - } -} - -void MultiIndex::LockForGetIndex(int dim) { - if (shallow_dim_groups) { - return; - } - group_for_dim[dim]->Lock(dim); -} - -void MultiIndex::UnlockFromGetIndex(int dim) { - if (shallow_dim_groups) { - return; - } - group_for_dim[dim]->Unlock(dim); -} - -uint64_t MultiIndex::DimSize(int dim) // the size of one dimension: material_no_tuples for materialized, - // NumOfOnes for virtual -{ - return group_for_dim[dim]->NumOfTuples(); -} - -void MultiIndex::LockAllForUse() { - if (shallow_dim_groups) { - return; - } - for (int dim = 0; dim < no_dimensions; dim++) LockForGetIndex(dim); -} - -void MultiIndex::UnlockAllFromUse() { - if (shallow_dim_groups) { - return; - } - for (int dim = 0; dim < no_dimensions; dim++) UnlockFromGetIndex(dim); -} - -void MultiIndex::MakeCountOnly(int64_t mat_tuples, DimensionVector &dims_to_materialize) { - MEASURE_FET("MultiIndex::MakeCountOnly(...)"); - MarkInvolvedDimGroups(dims_to_materialize); - for (int i = 0; i < NumOfDimensions(); i++) { - if (dims_to_materialize[i] && group_for_dim[i] != nullptr) { - // delete this group - int dim_group_to_delete = group_num_for_dim[i]; - for (int j = i; j < NumOfDimensions(); j++) - if (group_num_for_dim[j] == dim_group_to_delete) { - group_for_dim[j] = nullptr; - group_num_for_dim[j] = -1; - } - delete dim_groups[dim_group_to_delete]; - dim_groups[dim_group_to_delete] = nullptr; // these holes will be packed in FillGroupForDim() - } - } - DimensionGroupMaterialized *count_only_group = new DimensionGroupMaterialized(dims_to_materialize); - count_only_group->SetNumOfObj(mat_tuples); - dim_groups.push_back(count_only_group); - FillGroupForDim(); - UpdateNumOfTuples(); -} - -void MultiIndex::UpdateNumOfTuples() { - // assumptions: - // - no_material_tuples is correct, even if all t[...] are nullptr (forgotten). - // However, if all f[...] are not nullptr, then the index is set to IT_VIRTUAL - // and no_material_tuples = 0 - // - IT_MIXED or IT_VIRTUAL may be in fact IT_ORDERED (must be set properly on - // output) - // - if no_material_tuples > 0, then new index_type is not IT_VIRTUAL - no_tuples_too_big = false; - if (dim_groups.size() == 0) - no_tuples = 0; - else { - no_tuples = 1; - for (uint i = 0; i < dim_groups.size(); i++) { - dim_groups[i]->UpdateNumOfTuples(); - MultiplyNoTuples(dim_groups[i]->NumOfTuples()); - } - } -} - -int64_t MultiIndex::NumOfTuples(DimensionVector &dimensions, - bool fail_on_overflow) // for a given subset of dimensions -{ - std::vector dg = ListInvolvedDimGroups(dimensions); - if (dg.size() == 0) - return 0; - int64_t res = 1; - for (uint i = 0; i < dg.size(); i++) { - dim_groups[dg[i]]->UpdateNumOfTuples(); - res = SafeMultiplication(res, dim_groups[dg[i]]->NumOfTuples()); - } - if (res == common::NULL_VALUE_64 && fail_on_overflow) - throw common::OutOfMemoryException("Too many tuples. (1428)"); - return res; -} - -int MultiIndex::MaxNumOfPacks(int dim) // maximal (upper approx.) number of different nonempty data - // packs for the given dimension -{ - int max_packs = 0; - Filter *f = group_for_dim[dim]->GetFilter(dim); - if (f) { - for (size_t p = 0; p < f->NumOfBlocks(); p++) - if (!f->IsEmpty(p)) - max_packs++; - } else { - max_packs = int((dim_size[dim]) >> p_power) + 1; - if (group_for_dim[dim]->NumOfTuples() < max_packs) - max_packs = (int)group_for_dim[dim]->NumOfTuples(); - } - return max_packs; -} - -// Logical operations - -void MultiIndex::MIFilterAnd(MIIterator &mit, - Filter &fd) // limit the MultiIndex by excluding - // all tuples which are not present in - // fd, in order given by mit -{ - MEASURE_FET("MultiIndex::MIFilterAnd(...)"); - LockAllForUse(); - if (no_dimensions == 1 && group_for_dim[0]->GetFilter(0)) { - Filter *f = group_for_dim[0]->GetFilter(0); - FilterOnesIterator fit(f, p_power); - int64_t cur_pos = 0; - while (fit.IsValid()) { - if (!fd.Get(cur_pos)) - fit.ResetDelayed(); - ++fit; - cur_pos++; - } - f->Commit(); - UpdateNumOfTuples(); - UnlockAllFromUse(); - return; - } - - DimensionVector dim_used(mit.DimsUsed()); - MarkInvolvedDimGroups(dim_used); - int64_t new_no_tuples = fd.NumOfOnes(); - JoinTips tips(*this); - MINewContents new_mind(this, tips); - new_mind.SetDimensions(dim_used); - new_mind.Init(new_no_tuples); - mit.Rewind(); - int64_t f_pos = 0; - while (mit.IsValid()) { - if (fd.Get(f_pos)) { - for (int d = 0; d < no_dimensions; d++) - if (dim_used[d]) - new_mind.SetNewTableValue(d, mit[d]); - new_mind.CommitNewTableValues(); - } - ++mit; - f_pos++; - } - new_mind.Commit(new_no_tuples); - UpdateNumOfTuples(); - UnlockAllFromUse(); -} - -bool MultiIndex::MarkInvolvedDimGroups( - DimensionVector &v) // if any dimension is marked, then mark the rest of its group -{ - bool added = false; - for (int i = 0; i < no_dimensions; i++) { - if (!v[i]) { - for (int j = 0; j < no_dimensions; j++) { - if (v[j] && group_num_for_dim[i] == group_num_for_dim[j]) { - v[i] = true; - added = true; - break; - } - } - } - } - return added; -} - -std::vector MultiIndex::ListInvolvedDimGroups(DimensionVector &v) // List all internal numbers of groups touched - // by the set of dimensions -{ - std::vector res; - int cur_group_number; - for (int i = 0; i < no_dimensions; i++) { - if (v[i]) { - cur_group_number = group_num_for_dim[i]; - bool added = false; - for (uint j = 0; j < res.size(); j++) - if (res[j] == cur_group_number) { - added = true; - break; - } - if (!added) - res.push_back(cur_group_number); - } - } - return res; -} - -std::string MultiIndex::Display() { - std::vector ind_tab_no; - int it_count = 0; - for (uint i = 0; i < dim_groups.size(); i++) { - if (dim_groups[i]->Type() == DimensionGroup::DGType::DG_INDEX_TABLE) { - it_count++; - ind_tab_no.push_back(it_count); // calculate a number of a materialized dim. group - } else - ind_tab_no.push_back(0); - } - - std::string s; - s = "["; - for (int i = 0; i < no_dimensions; i++) { - if (!group_for_dim[i]->DimEnabled(i)) - s += "-"; - else if (group_for_dim[i]->Type() == DimensionGroup::DGType::DG_FILTER) - s += "f"; - else if (group_for_dim[i]->Type() == DimensionGroup::DGType::DG_INDEX_TABLE) - s += (ind_tab_no[group_num_for_dim[i]] > 9 ? 'T' : '0' + ind_tab_no[group_num_for_dim[i]]); - else if (group_for_dim[i]->Type() == DimensionGroup::DGType::DG_VIRTUAL) - s += (GetFilter(i) ? 'F' : 'v'); - else - s += "?"; - } - s += "]"; - return s; -} -} // namespace core -} // namespace Tianmu +/* Copyright (c) 2022 StoneAtom, Inc. All rights reserved. + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#include "multi_index.h" + +#include "index/mi_new_contents.h" +#include "optimizer/group_distinct_table.h" +#include "optimizer/iterators/mi_iterator.h" +#include "optimizer/joiner.h" +#include "util/tools.h" + +namespace Tianmu { +namespace core { + +#define MATERIAL_TUPLES_LIMIT 150000000000LL // = ~1 TB of cache needed for one dimension +#define MATERIAL_TUPLES_WARNING 2500000000LL // = 10-20 GB of cache for one dimension + +MultiIndex::MultiIndex(uint32_t power) : m_conn(current_txn_) { + // update Clear() on any change + p_power = power; + no_dimensions = 0; + no_tuples = 0; + no_tuples_too_big = false; + dim_size = nullptr; + group_for_dim = nullptr; + group_num_for_dim = nullptr; + iterator_lock = 0; + shallow_dim_groups = false; +} + +MultiIndex::MultiIndex(const MultiIndex &s) : m_conn(s.m_conn) { + p_power = s.p_power; + no_dimensions = s.no_dimensions; + no_tuples = s.no_tuples; + no_tuples_too_big = s.no_tuples_too_big; + + if (no_dimensions > 0) { + dim_size = new int64_t[no_dimensions]; + group_for_dim = new DimensionGroup *[no_dimensions]; + group_num_for_dim = new int[no_dimensions]; + used_in_output = s.used_in_output; + can_be_distinct = s.can_be_distinct; + + for (uint i = 0; i < s.dim_groups.size(); i++) dim_groups.push_back(s.dim_groups[i]->Clone(false)); + + for (int i = 0; i < no_dimensions; i++) dim_size[i] = s.dim_size[i]; + + FillGroupForDim(); + } else { + dim_size = nullptr; + group_for_dim = nullptr; + group_num_for_dim = nullptr; + } + + iterator_lock = 0; + shallow_dim_groups = false; +} + +MultiIndex::MultiIndex(MultiIndex &s, bool shallow) : m_conn(s.m_conn) { + p_power = s.p_power; + no_dimensions = s.no_dimensions; + no_tuples = s.no_tuples; + no_tuples_too_big = s.no_tuples_too_big; + + if (no_dimensions > 0) { + group_for_dim = new DimensionGroup *[no_dimensions]; + group_num_for_dim = new int[no_dimensions]; + dim_size = new int64_t[no_dimensions]; + used_in_output = s.used_in_output; + can_be_distinct = s.can_be_distinct; + for (uint i = 0; i < s.dim_groups.size(); i++) dim_groups.push_back(s.dim_groups[i]->Clone(shallow)); + + for (int i = 0; i < no_dimensions; i++) dim_size[i] = s.dim_size[i]; + + FillGroupForDim(); + } else { + dim_size = nullptr; + group_for_dim = nullptr; + group_num_for_dim = nullptr; + } + + iterator_lock = 0; + shallow_dim_groups = shallow; +} + +MultiIndex::~MultiIndex() { + if (!shallow_dim_groups) { + for (uint i = 0; i < dim_groups.size(); i++) { + delete dim_groups[i]; + dim_groups[i] = nullptr; + } + } + + delete[] dim_size; + delete[] group_for_dim; + delete[] group_num_for_dim; +} + +void MultiIndex::Clear() { + try { + for (uint i = 0; i < dim_groups.size(); i++) { + delete dim_groups[i]; + dim_groups[i] = nullptr; + } + } catch (...) { + DEBUG_ASSERT(!"exception from destructor"); + } + + delete[] dim_size; + delete[] group_for_dim; + delete[] group_num_for_dim; + + dim_groups.clear(); + no_dimensions = 0; + no_tuples = 0; + no_tuples_too_big = false; + dim_size = nullptr; + group_for_dim = nullptr; + group_num_for_dim = nullptr; + iterator_lock = 0; + + can_be_distinct.clear(); + used_in_output.clear(); +} + +void MultiIndex::FillGroupForDim() { + MEASURE_FET("MultiIndex::FillGroupForDim(...)"); + int move_groups = 0; + for (uint i = 0; i < dim_groups.size(); i++) { // pack all holes + if (dim_groups[i] == nullptr) { + while (i + move_groups < dim_groups.size() && dim_groups[i + move_groups] == nullptr) move_groups++; + + if (i + move_groups < dim_groups.size()) { + dim_groups[i] = dim_groups[i + move_groups]; + dim_groups[i + move_groups] = nullptr; + } else + break; + } + } + + for (int i = 0; i < move_groups; i++) dim_groups.pop_back(); // clear nulls from the end + + for (int d = 0; d < no_dimensions; d++) { + group_for_dim[d] = nullptr; + group_num_for_dim[d] = -1; + } + + for (uint i = 0; i < dim_groups.size(); i++) { + for (int d = 0; d < no_dimensions; d++) + if (dim_groups[i]->DimUsed(d)) { + group_for_dim[d] = dim_groups[i]; + group_num_for_dim[d] = i; + } + } +} + +void MultiIndex::Empty(int dim_to_make_empty) { + if (dim_to_make_empty != -1) + group_for_dim[dim_to_make_empty]->Empty(); + else { + for (uint i = 0; i < dim_groups.size(); i++) dim_groups[i]->Empty(); + } + + for (int i = 0; i < no_dimensions; i++) { + if (dim_to_make_empty == -1 || group_for_dim[dim_to_make_empty]->DimUsed(i)) { + can_be_distinct[i] = true; + used_in_output[i] = true; + } + } + + no_tuples = 0; + no_tuples_too_big = false; +} + +void MultiIndex::AddDimension() { + no_dimensions++; + int64_t *ns = new int64_t[no_dimensions]; + DimensionGroup **ng = new DimensionGroup *[no_dimensions]; + int *ngn = new int[no_dimensions]; + + for (int i = 0; i < no_dimensions - 1; i++) { + ns[i] = dim_size[i]; + ng[i] = group_for_dim[i]; + ngn[i] = group_num_for_dim[i]; + } + + delete[] dim_size; + delete[] group_for_dim; + delete[] group_num_for_dim; + + dim_size = ns; + group_for_dim = ng; + group_num_for_dim = ngn; + dim_size[no_dimensions - 1] = 0; + group_for_dim[no_dimensions - 1] = nullptr; + group_num_for_dim[no_dimensions - 1] = -1; + // Temporary code, for rare cases when we add a dimension after other joins + for (uint i = 0; i < dim_groups.size(); i++) dim_groups[i]->AddDimension(); + + return; // Note: other functions will use AddDimension() to enlarge tables +} + +void MultiIndex::AddDimension_cross(uint64_t size) { + AddDimension(); + int new_dim = no_dimensions - 1; + used_in_output.push_back(true); + + if (no_dimensions > 1) + MultiplyNoTuples(size); + else + no_tuples = size; + + DimensionGroupFilter *nf = nullptr; + if (size > 0) { + dim_size[new_dim] = size; + nf = new DimensionGroupFilter(new_dim, size, p_power); // redo + } else { // size == 0 => prepare a dummy dimension with empty filter + dim_size[new_dim] = 1; + nf = new DimensionGroupFilter(new_dim, dim_size[new_dim], p_power); // redo + nf->Empty(); + } + + dim_groups.push_back(nf); + group_for_dim[new_dim] = nf; + group_num_for_dim[new_dim] = int(dim_groups.size() - 1); + can_be_distinct.push_back(true); // may be modified below + CheckIfVirtualCanBeDistinct(); +} + +void MultiIndex::MultiplyNoTuples(uint64_t factor) { + no_tuples = SafeMultiplication(no_tuples, factor); + if (no_tuples == static_cast(common::NULL_VALUE_64)) + no_tuples_too_big = true; +} + +void MultiIndex::CheckIfVirtualCanBeDistinct() // updates can_be_distinct table + // in case of virtual multiindex +{ + // check whether can_be_distinct can be true it is possible only when there are only 1-object dimensions + // and one multiobject (then the multiobject one can be distinct, the rest cannot) + if (no_dimensions > 1) { + int non_one_found = 0; + for (int i = 0; i < no_dimensions; i++) { + if (dim_size[i] > 1) + non_one_found++; + } + + if (non_one_found == 1) { + for (int j = 0; j < no_dimensions; j++) { + can_be_distinct[j] = (dim_size[j] == 1) ? false : true; + } + } + + if (non_one_found > 1) + for (int j = 0; j < no_dimensions; j++) can_be_distinct[j] = false; + } +} + +void MultiIndex::LockForGetIndex(int dim) { + if (shallow_dim_groups) { + return; + } + group_for_dim[dim]->Lock(dim); +} + +void MultiIndex::UnlockFromGetIndex(int dim) { + if (shallow_dim_groups) { + return; + } + group_for_dim[dim]->Unlock(dim); +} + +uint64_t MultiIndex::DimSize(int dim) // the size of one dimension: material_no_tuples for materialized, + // NumOfOnes for virtual +{ + return group_for_dim[dim]->NumOfTuples(); +} + +void MultiIndex::LockAllForUse() { + if (shallow_dim_groups) { + return; + } + for (int dim = 0; dim < no_dimensions; dim++) LockForGetIndex(dim); +} + +void MultiIndex::UnlockAllFromUse() { + if (shallow_dim_groups) { + return; + } + for (int dim = 0; dim < no_dimensions; dim++) UnlockFromGetIndex(dim); +} + +void MultiIndex::MakeCountOnly(int64_t mat_tuples, DimensionVector &dims_to_materialize) { + MEASURE_FET("MultiIndex::MakeCountOnly(...)"); + MarkInvolvedDimGroups(dims_to_materialize); + + for (int i = 0; i < NumOfDimensions(); i++) { + if (dims_to_materialize[i] && group_for_dim[i] != nullptr) { + // delete this group + int dim_group_to_delete = group_num_for_dim[i]; + for (int j = i; j < NumOfDimensions(); j++) { + if (group_num_for_dim[j] == dim_group_to_delete) { + group_for_dim[j] = nullptr; + group_num_for_dim[j] = -1; + } + } + + delete dim_groups[dim_group_to_delete]; + dim_groups[dim_group_to_delete] = nullptr; // these holes will be packed in FillGroupForDim() + } + } + + DimensionGroupMaterialized *count_only_group = new DimensionGroupMaterialized(dims_to_materialize); + count_only_group->SetNumOfObj(mat_tuples); + dim_groups.push_back(count_only_group); + + FillGroupForDim(); + UpdateNumOfTuples(); +} + +void MultiIndex::UpdateNumOfTuples() { + // assumptions: + // - no_material_tuples is correct, even if all t[...] are nullptr (forgotten). + // However, if all f[...] are not nullptr, then the index is set to IT_VIRTUAL + // and no_material_tuples = 0 + // - IT_MIXED or IT_VIRTUAL may be in fact IT_ORDERED (must be set properly on + // output) + // - if no_material_tuples > 0, then new index_type is not IT_VIRTUAL + no_tuples_too_big = false; + no_tuples = (dim_groups.size() == 0) ? 0 : 1; + + for (uint i = 0; i < dim_groups.size(); i++) { + dim_groups[i]->UpdateNumOfTuples(); + MultiplyNoTuples(dim_groups[i]->NumOfTuples()); + } +} + +int64_t MultiIndex::NumOfTuples(DimensionVector &dimensions, + bool fail_on_overflow) // for a given subset of dimensions +{ + std::vector dg = ListInvolvedDimGroups(dimensions); + if (dg.size() == 0) + return 0; + + int64_t res = 1; + for (uint i = 0; i < dg.size(); i++) { + dim_groups[dg[i]]->UpdateNumOfTuples(); + res = SafeMultiplication(res, dim_groups[dg[i]]->NumOfTuples()); + } + + if (res == common::NULL_VALUE_64 && fail_on_overflow) + throw common::OutOfMemoryException("Too many tuples. (1428)"); + + return res; +} + +int MultiIndex::MaxNumOfPacks(int dim) // maximal (upper approx.) number of different nonempty data + // packs for the given dimension +{ + int max_packs = 0; + Filter *f = group_for_dim[dim]->GetFilter(dim); + if (f) { + for (size_t p = 0; p < f->NumOfBlocks(); p++) + if (!f->IsEmpty(p)) + max_packs++; + } else { + max_packs = int((dim_size[dim]) >> p_power) + 1; + if (group_for_dim[dim]->NumOfTuples() < max_packs) + max_packs = (int)group_for_dim[dim]->NumOfTuples(); + } + return max_packs; +} + +// Logical operations + +void MultiIndex::MIFilterAnd(MIIterator &mit, + Filter &fd) // limit the MultiIndex by excluding + // all tuples which are not present in + // fd, in order given by mit +{ + MEASURE_FET("MultiIndex::MIFilterAnd(...)"); + LockAllForUse(); + if (no_dimensions == 1 && group_for_dim[0]->GetFilter(0)) { + Filter *f = group_for_dim[0]->GetFilter(0); + FilterOnesIterator fit(f, p_power); + int64_t cur_pos = 0; + while (fit.IsValid()) { + if (!fd.Get(cur_pos)) + fit.ResetDelayed(); + + ++fit; + cur_pos++; + } + + f->Commit(); + UpdateNumOfTuples(); + UnlockAllFromUse(); + return; + } + + DimensionVector dim_used(mit.DimsUsed()); + MarkInvolvedDimGroups(dim_used); + int64_t new_no_tuples = fd.NumOfOnes(); + JoinTips tips(*this); + MINewContents new_mind(this, tips); + new_mind.SetDimensions(dim_used); + new_mind.Init(new_no_tuples); + mit.Rewind(); + int64_t f_pos = 0; + + while (mit.IsValid()) { + if (fd.Get(f_pos)) { + for (int d = 0; d < no_dimensions; d++) + if (dim_used[d]) + new_mind.SetNewTableValue(d, mit[d]); + new_mind.CommitNewTableValues(); + } + ++mit; + f_pos++; + } + + new_mind.Commit(new_no_tuples); + UpdateNumOfTuples(); + UnlockAllFromUse(); +} + +bool MultiIndex::MarkInvolvedDimGroups( + DimensionVector &v) // if any dimension is marked, then mark the rest of its group +{ + bool added = false; + for (int i = 0; i < no_dimensions; i++) { + if (!v[i]) { + for (int j = 0; j < no_dimensions; j++) { + if (v[j] && group_num_for_dim[i] == group_num_for_dim[j]) { + v[i] = true; + added = true; + break; + } + } + } + } + return added; +} + +std::vector MultiIndex::ListInvolvedDimGroups(DimensionVector &v) // List all internal numbers of groups touched + // by the set of dimensions +{ + std::vector res; + int cur_group_number; + for (int i = 0; i < no_dimensions; i++) { + if (v[i]) { + cur_group_number = group_num_for_dim[i]; + bool added = false; + for (uint j = 0; j < res.size(); j++) + if (res[j] == cur_group_number) { + added = true; + break; + } + if (!added) + res.push_back(cur_group_number); + } + } + return res; +} + +std::string MultiIndex::Display() { + std::vector ind_tab_no; + int it_count = 0; + for (uint i = 0; i < dim_groups.size(); i++) { + if (dim_groups[i]->Type() == DimensionGroup::DGType::DG_INDEX_TABLE) { + it_count++; + ind_tab_no.push_back(it_count); // calculate a number of a materialized dim. group + } else + ind_tab_no.push_back(0); + } + + std::string s; + s = "["; + for (int i = 0; i < no_dimensions; i++) { + if (!group_for_dim[i]->DimEnabled(i)) + s += "-"; + else if (group_for_dim[i]->Type() == DimensionGroup::DGType::DG_FILTER) + s += "f"; + else if (group_for_dim[i]->Type() == DimensionGroup::DGType::DG_INDEX_TABLE) + s += (ind_tab_no[group_num_for_dim[i]] > 9 ? 'T' : '0' + ind_tab_no[group_num_for_dim[i]]); + else if (group_for_dim[i]->Type() == DimensionGroup::DGType::DG_VIRTUAL) + s += (GetFilter(i) ? 'F' : 'v'); + else + s += "?"; + } + s += "]"; + return s; +} + +} // namespace core +} // namespace Tianmu diff --git a/storage/tianmu/index/multi_index.h b/storage/tianmu/index/multi_index.h index db319fef2..5039acef9 100644 --- a/storage/tianmu/index/multi_index.h +++ b/storage/tianmu/index/multi_index.h @@ -1,191 +1,182 @@ -/* Copyright (c) 2022 StoneAtom, Inc. All rights reserved. - Use is subject to license terms - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA -*/ -#ifndef TIANMU_CORE_MULTI_INDEX_H_ -#define TIANMU_CORE_MULTI_INDEX_H_ -#pragma once - -#include "core/dimension_group.h" -#include "executor/filter.h" -#include "index/index_table.h" -#include "optimizer/compile/cq_term.h" -#include "util/bin_tools.h" - -namespace Tianmu { -namespace core { -class MIIterator; - -class MultiIndex { - public: - MultiIndex(uint32_t power); // = PACK_DEFAUL - MultiIndex(const MultiIndex &s); - MultiIndex(MultiIndex &s, bool shallow); - ~MultiIndex(); - - void Clear(); // clear the multiindex completely (like destructor + Multiindex()) - - // Construction of the index - - // max_value - upper limit of indexes of newly added objects (e.g. number of - // all objects in a table to be joined) - void AddDimension_cross(uint64_t size); // calculate a cross product of the previous value of - // index and the full table (trivial filter) of 'size' - // objects - - // retrieve information - uint32_t ValueOfPower() { return p_power; } - int NumOfDimensions() const { return no_dimensions; } // number of dimensions - int64_t NumOfTuples() const { // number of all tuples - if (!no_tuples_too_big) - return no_tuples; - throw common::OutOfMemoryException("Too many tuples. (85)"); - return 0; - } - int64_t NumOfTuples(DimensionVector &dimensions, - bool fail_on_overflow = true); // for a given subset of dimensions - - bool ZeroTuples() { return (!no_tuples_too_big && no_tuples == 0); } - bool TooManyTuples() { return no_tuples_too_big; } - Filter *GetFilter(int dim) const // Get the pointer to a filter attached to a dimension. - // NOTE: will be nullptr in case of materialized MultiIndex! - { - return no_dimensions > 0 ? group_for_dim[dim]->GetFilter(dim) : nullptr; - } - Filter *GetUpdatableFilter(int dim) const // Get the pointer to a filter, if it may be changed. - // NOTE: will be nullptr in case of materialized - // MultiIndex! - { - return no_dimensions > 0 ? group_for_dim[dim]->GetUpdatableFilter(dim) : nullptr; - } - bool NullsExist(int dim) { - return no_dimensions > 0 ? group_for_dim[dim]->NullsPossible(dim) : false; - } // return true if there exist any 0 value (always false for virtual - // dimensions) - bool MarkInvolvedDimGroups(DimensionVector &v); // if any dimension is marked, then mark the - // rest of this class. Return true if anything - // new marked. - bool IsOrderable(int dim) { return no_dimensions > 0 ? group_for_dim[dim]->IsOrderable() : true; } - uint64_t DimSize(int dim); // the size of one dimension: NumOfOnes for virtual, - // number of materialized tuples for materialized - uint64_t OrigSize(int dim) { return dim_size[dim]; } - // the maximal size of one dimension (e.g. the size of a table, the maximal - // index possible) - // Locking - - void LockForGetIndex(int dim); // must precede GetIndex(...) - void UnlockFromGetIndex(int dim); - void LockAllForUse(); - void UnlockAllFromUse(); - - bool IteratorLock() { // register a normal iterator; false: already locked - // for updating - if (iterator_lock > -1) - iterator_lock++; - return (iterator_lock > -1); - } - bool IteratorUpdatingLock() { // register an updating iterator; false: - // already locked - if (iterator_lock == 0) { - iterator_lock = -1; - return true; - } - return false; - } - void IteratorUnlock() { - if (iterator_lock > 0) - iterator_lock--; - else - iterator_lock = 0; - } - - // operations on the index - - void MIFilterAnd(MIIterator &mit, - Filter &fd); // limit the MultiIndex by excluding all tuples - // which are not present in fd, in order given - // by mit - - bool CanBeDistinct(int dim) const { - return can_be_distinct[dim]; - } // true if ( distinct(orig. column) => distinct( result ) ), false if we - // cannot guarantee this - bool IsForgotten(int dim) { - return group_for_dim[dim] ? !group_for_dim[dim]->DimEnabled(dim) : false; - } // true if the dimension is forgotten (not valid for getting value) - bool IsUsedInOutput(int dim) { return used_in_output[dim]; } // true if the dimension is used in output columns - void SetUsedInOutput(int dim) { used_in_output[dim] = true; } - void ResetUsedInOutput(int dim) { used_in_output[dim] = false; } - void Empty(int dim_to_make_empty = -1); // make an index empty (delete all - // tuples) with the same dimensions - // if parameter is set, then do not delete any virtual filter except this one - void UpdateNumOfTuples(); // recalculate the number of tuples - void MakeCountOnly(int64_t mat_tuples, DimensionVector &dims_to_materialize); - // recalculate the number of tuples, assuming mat_tuples is the new - // material_no_tuples and the dimensions from the list are deleted - - int MaxNumOfPacks(int dim); // maximal (upper approx.) number of different - // nonempty data packs for the given dimension - std::string Display(); // MultiIndex structure: f - Filter, i - IndexTable - - Transaction &ConnInfo() const { return *m_conn; } - - Transaction *m_conn; - - friend class MINewContents; - friend class MIIterator; - - friend class MultiIndexBuilder; - - private: - void AddDimension(); // declare a new dimension (internal) - void CheckIfVirtualCanBeDistinct(); // updates can_be_distinct table in case - // of virtual multiindex - std::vector ListInvolvedDimGroups(DimensionVector &v); // List all internal numbers of - // groups touched by the set of - // dimensions - - int no_dimensions; - int64_t *dim_size; // the size of a dimension - uint64_t no_tuples; // actual number of tuples (also in case of virtual - // index); should be updated in any change of index - uint32_t p_power; - bool no_tuples_too_big; // this flag is set if a virtual number of tuples - // exceeds 2^64 - std::vector can_be_distinct; // true if the dimension contain only one copy of - // original rows, false if we cannot guarantee this - std::vector used_in_output; // true if given dimension is used for - // generation of output columns - - // DimensionGroup stuff - void FillGroupForDim(); - std::vector dim_groups; // all active dimension groups - DimensionGroup **group_for_dim; // pointers to elements of dim_groups, for - // faster dimension identification - int *group_num_for_dim; // an element number of dim_groups, for faster - // dimension identification - - // Some technical functions - void MultiplyNoTuples(uint64_t factor); // the same as "no_tuples*=factor", but set - // no_tuples_too_big whenever needed - - int iterator_lock; // 0 - unlocked, >0 - normal iterator exists, -1 - - // updating iterator exists - bool shallow_dim_groups; // Indicates whether dim_groups is a shallow copy -}; -} // namespace core -} // namespace Tianmu - -#endif // TIANMU_CORE_MULTI_INDEX_H_ +/* Copyright (c) 2022 StoneAtom, Inc. All rights reserved. + Use is subject to license terms + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ +#ifndef TIANMU_CORE_MULTI_INDEX_H_ +#define TIANMU_CORE_MULTI_INDEX_H_ +#pragma once + +#include "core/dimension_group.h" +#include "executor/filter.h" +#include "index/index_table.h" +#include "optimizer/compile/cq_term.h" +#include "util/bin_tools.h" + +namespace Tianmu { +namespace core { +class MIIterator; + +class MultiIndex { + public: + MultiIndex(uint32_t power); // = PACK_DEFAUL + MultiIndex(const MultiIndex &s); + MultiIndex(MultiIndex &s, bool shallow); + ~MultiIndex(); + + void Clear(); // clear the multiindex completely (like destructor + Multiindex()) + + // Construction of the index + + // max_value - upper limit of indexes of newly added objects (e.g. number of + // all objects in a table to be joined) + void AddDimension_cross(uint64_t size); // calculate a cross product of the previous value of + // index and the full table (trivial filter) of 'size' objects + + // retrieve information + uint32_t ValueOfPower() { return p_power; } + int NumOfDimensions() const { return no_dimensions; } // number of dimensions + int64_t NumOfTuples() const { // number of all tuples + if (!no_tuples_too_big) + return no_tuples; + throw common::OutOfMemoryException("Too many tuples. (85)"); + return 0; + } + + int64_t NumOfTuples(DimensionVector &dimensions, + bool fail_on_overflow = true); // for a given subset of dimensions + + bool ZeroTuples() { return (!no_tuples_too_big && no_tuples == 0); } + bool TooManyTuples() { return no_tuples_too_big; } + + Filter *GetFilter(int dim) const { // Get the pointer to a filter attached to a dimension. + // NOTE: will be nullptr in case of materialized MultiIndex! + return no_dimensions > 0 ? group_for_dim[dim]->GetFilter(dim) : nullptr; + } + + Filter *GetUpdatableFilter(int dim) const { // Get the pointer to a filter, if it may be changed. + // NOTE: will be nullptr in case of materialized MultiIndex! + return no_dimensions > 0 ? group_for_dim[dim]->GetUpdatableFilter(dim) : nullptr; + } + + bool NullsExist(int dim) { + return no_dimensions > 0 ? group_for_dim[dim]->NullsPossible(dim) : false; + } // return true if there exist any 0 value (always false for virtual dimensions) + + bool MarkInvolvedDimGroups(DimensionVector &v); // if any dimension is marked, then mark the rest of this class. + // Return true if anything new marked. + bool IsOrderable(int dim) { return no_dimensions > 0 ? group_for_dim[dim]->IsOrderable() : true; } + + uint64_t DimSize(int dim); // the size of one dimension: NumOfOnes for virtual, + // number of materialized tuples for materialized + uint64_t OrigSize(int dim) { return dim_size[dim]; } + // the maximal size of one dimension (e.g. the size of a table, the maximal + // index possible) + // Locking + + void LockForGetIndex(int dim); // must precede GetIndex(...) + void UnlockFromGetIndex(int dim); + void LockAllForUse(); + void UnlockAllFromUse(); + + bool IteratorLock() { // register a normal iterator; false: already locked for updating + if (iterator_lock > -1) + iterator_lock++; + return (iterator_lock > -1); + } + + bool IteratorUpdatingLock() { // register an updating iterator; false: already locked + if (iterator_lock == 0) { + iterator_lock = -1; + return true; + } + + return false; + } + + void IteratorUnlock() { (iterator_lock > 0) ? iterator_lock-- : iterator_lock = 0; } + + // operations on the index + void MIFilterAnd(MIIterator &mit, + Filter &fd); // limit the MultiIndex by excluding all tuples + // which are not present in fd, in order given + // by mit + + bool CanBeDistinct(int dim) const { + return can_be_distinct[dim]; + } // true if ( distinct(orig. column) => distinct( result ) ), false if we cannot guarantee this + + bool IsForgotten(int dim) { + return group_for_dim[dim] ? !group_for_dim[dim]->DimEnabled(dim) : false; + } // true if the dimension is forgotten (not valid for getting value) + bool IsUsedInOutput(int dim) { return used_in_output[dim]; } // true if the dimension is used in output columns + + void SetUsedInOutput(int dim) { used_in_output[dim] = true; } + void ResetUsedInOutput(int dim) { used_in_output[dim] = false; } + void Empty(int dim_to_make_empty = -1); // make an index empty (delete all tuples) with the same dimensions + // if parameter is set, then do not delete any virtual filter except this one + void UpdateNumOfTuples(); // recalculate the number of tuples + void MakeCountOnly(int64_t mat_tuples, DimensionVector &dims_to_materialize); + // recalculate the number of tuples, assuming mat_tuples is the new + // material_no_tuples and the dimensions from the list are deleted + + int MaxNumOfPacks(int dim); // maximal (upper approx.) number of different + // nonempty data packs for the given dimension + std::string Display(); // MultiIndex structure: f - Filter, i - IndexTable + + Transaction &ConnInfo() const { return *m_conn; } + + Transaction *m_conn; + friend class MINewContents; + friend class MIIterator; + friend class MultiIndexBuilder; + + private: + void AddDimension(); // declare a new dimension (internal) + void CheckIfVirtualCanBeDistinct(); // updates can_be_distinct table in case + // of virtual multiindex + std::vector ListInvolvedDimGroups(DimensionVector &v); // List all internal numbers of + // groups touched by the set of + // dimensions + + int no_dimensions; + int64_t *dim_size; // the size of a dimension + uint64_t no_tuples; // actual number of tuples (also in case of virtual + // index); should be updated in any change of index + uint32_t p_power; + bool no_tuples_too_big; // this flag is set if a virtual number of tuples exceeds 2^64 + + std::vector can_be_distinct; // true if the dimension contain only one copy of + // original rows, false if we cannot guarantee this + std::vector used_in_output; // true if given dimension is used for generation of output columns + + // DimensionGroup stuff + void FillGroupForDim(); + std::vector dim_groups; // all active dimension groups + DimensionGroup **group_for_dim; // pointers to elements of dim_groups, for + // faster dimension identification + int *group_num_for_dim; // an element number of dim_groups, for faster dimension identification + + // Some technical functions + void MultiplyNoTuples(uint64_t factor); // the same as "no_tuples*=factor", but set + // no_tuples_too_big whenever needed + + int iterator_lock; // 0 - unlocked, >0 - normal iterator exists, -1 - updating iterator exists + bool shallow_dim_groups; // Indicates whether dim_groups is a shallow copy +}; + +} // namespace core +} // namespace Tianmu + +#endif // TIANMU_CORE_MULTI_INDEX_H_ diff --git a/storage/tianmu/optimizer/compile/compiled_query.cpp b/storage/tianmu/optimizer/compile/compiled_query.cpp index 5912ce42d..890d974d4 100644 --- a/storage/tianmu/optimizer/compile/compiled_query.cpp +++ b/storage/tianmu/optimizer/compile/compiled_query.cpp @@ -437,16 +437,19 @@ void CompiledQuery::TableAlias(TabID &t_out, const TabID &n, const char *name, [ steps.push_back(s); } -void CompiledQuery::TmpTable(TabID &t_out, const TabID &t1, bool for_subq_in_where) { +void CompiledQuery::TmpTable(TabID &t_out, const TabID &t1, TableSubType subtype, bool for_subq_in_where) { CompiledQuery::CQStep s; if (for_subq_in_where) s.n1 = 1; else s.n1 = 0; + DEBUG_ASSERT(t1.n < 0 && NumOfTabs() > 0); s.type = StepType::TMP_TABLE; s.t1 = t_out = NextTabID(); // was s.t2!!! s.tables1.push_back(t1); + s.n2 = static_cast::type>(subtype); + steps_tmp_tables.push_back(s); steps.push_back(s); } diff --git a/storage/tianmu/optimizer/compile/compiled_query.h b/storage/tianmu/optimizer/compile/compiled_query.h index 63f36a43a..c19abb144 100644 --- a/storage/tianmu/optimizer/compile/compiled_query.h +++ b/storage/tianmu/optimizer/compile/compiled_query.h @@ -131,7 +131,7 @@ class CompiledQuery final { // Add a new step to the execution plan void TableAlias(TabID &t_out, const TabID &n, const char *tab_name = nullptr, int id = -1); - void TmpTable(TabID &t_out, const TabID &t1, bool for_subq = false); + void TmpTable(TabID &t_out, const TabID &t1, TableSubType subtype, bool for_subq = false); void CreateConds(CondID &c_out, const TabID &t1, CQTerm e1, common::Operator pr, CQTerm e2, CQTerm e3 = CQTerm(), bool is_or_subtree = false, char like_esc = '\\', bool can_cond_push = false); void CreateConds(CondID &c_out, const TabID &t1, const CondID &c1, bool is_or_subtree = false, diff --git a/storage/tianmu/sql/ha_my_tianmu.cpp b/storage/tianmu/sql/ha_my_tianmu.cpp index fb96e7d6f..89b99f0b9 100644 --- a/storage/tianmu/sql/ha_my_tianmu.cpp +++ b/storage/tianmu/sql/ha_my_tianmu.cpp @@ -70,15 +70,13 @@ bool ha_my_tianmu_set_statement_allowed(THD *thd, LEX *lex) { my_message(static_cast(common::ErrorCode::UNKNOWN_ERROR), "Queries inside SET statements are not supported. " "Enable the MySQL core::Query Path in my.cnf to execute the query " - "with reduced " - "performance.", + "with reduced performance.", MYF(0)); return false; } else push_warning(thd, Sql_condition::SL_NOTE, ER_UNKNOWN_ERROR, "SET statement not supported by the Tianmu Optimizer. The " - "query executed " - "by MySQL engine."); + "query executed by MySQL engine."); } return true; } @@ -97,8 +95,9 @@ QueryRouteTo ha_my_tianmu_query(THD *thd, LEX *lex, Query_result *&result_output if (handle_select_ret == QueryRouteTo::kToMySQL && AtLeastOneTianmuTableInvolved(lex) && ForbiddenMySQLQueryPath(lex)) { my_message(static_cast(common::ErrorCode::UNKNOWN_ERROR), - "The query includes syntax that is not supported by the storage engine. \ -Either restructure the query with supported syntax, or enable the MySQL core::Query Path in config file to execute the query with reduced performance.", + "The query includes syntax that is not supported by the storage engine. " + "Either restructure the query with supported syntax, or enable the MySQL core::Query Path in config " + "file to execute the query with reduced performance.", MYF(0)); handle_select_ret = QueryRouteTo::kToTianmu; } diff --git a/storage/tianmu/sql/ha_my_tianmu.h b/storage/tianmu/sql/ha_my_tianmu.h index 3ba01ecf4..211c9b74d 100644 --- a/storage/tianmu/sql/ha_my_tianmu.h +++ b/storage/tianmu/sql/ha_my_tianmu.h @@ -22,10 +22,7 @@ namespace Tianmu { namespace DBHandler { -enum class QueryRouteTo { - kToMySQL = 0, - kToTianmu, -}; +enum class QueryRouteTo { kToMySQL = 0, kToTianmu, kUnknown }; // processing the queries which routed to Tianmu engine. QueryRouteTo ha_my_tianmu_query(THD *thd, LEX *lex, Query_result *&result_output, ulong setup_tables_done_option, diff --git a/storage/tianmu/vc/column_bin_encoder.cpp b/storage/tianmu/vc/column_bin_encoder.cpp index f692615cd..4d38306d7 100644 --- a/storage/tianmu/vc/column_bin_encoder.cpp +++ b/storage/tianmu/vc/column_bin_encoder.cpp @@ -139,9 +139,9 @@ bool ColumnBinEncoder::PrepareEncoder(vcolumn::VirtualColumn *_vc, vcolumn::Virt // (above) my_encoder.reset(new ColumnBinEncoder::EncoderInt(vc, decodable, nulls_possible, descending)); } else { - DEBUG_ASSERT(!"wrong combination of encoded columns"); // Other types not - // implemented yet - my_encoder.reset(new ColumnBinEncoder::EncoderText(vc, decodable, nulls_possible, descending)); + // DEBUG_ASSERT(!"wrong combination of encoded columns"); // Other types not implemented yet + throw common::DatabaseException("wrong types of columns"); + // my_encoder.reset(new ColumnBinEncoder::EncoderText(vc, decodable, nulls_possible, descending)); } if (_vc2 != nullptr) { // multiple column encoding? bool encoding_possible = my_encoder->SecondColumn(_vc2); From 6a560a076a9a916056f132b9a32654002c854eba Mon Sep 17 00:00:00 2001 From: RingsC Date: Tue, 27 Jun 2023 17:10:32 +0800 Subject: [PATCH 09/10] fix(tianmu): To remove unnessary optimization in tianmu 1:Removes the unnessary optimization in stage of compiliation of tianmu. It doesnot have any helps for us. and may introuduce unexepected behaviors. 2:Refine MTR: issue848, issue1865, alter_table1, issue1523 --- mysql-test/suite/tianmu/r/alter_table1.result | 15 ++-- mysql-test/suite/tianmu/r/issue1861.result | 61 +++++++++++++++ mysql-test/suite/tianmu/r/issue1865.result | 1 + mysql-test/suite/tianmu/r/issue848.result | 23 +++--- mysql-test/suite/tianmu/t/alter_table1.test | 15 ++-- mysql-test/suite/tianmu/t/issue1523.test | 12 +-- mysql-test/suite/tianmu/t/issue1861.test | 78 +++++++++++++++++++ mysql-test/suite/tianmu/t/issue1865.test | 2 + mysql-test/suite/tianmu/t/issue848.test | 24 ++++-- storage/tianmu/core/query_compile.cpp | 7 -- 10 files changed, 196 insertions(+), 42 deletions(-) create mode 100644 mysql-test/suite/tianmu/r/issue1861.result create mode 100644 mysql-test/suite/tianmu/t/issue1861.test diff --git a/mysql-test/suite/tianmu/r/alter_table1.result b/mysql-test/suite/tianmu/r/alter_table1.result index 8694cec97..5d2b1a3b4 100644 --- a/mysql-test/suite/tianmu/r/alter_table1.result +++ b/mysql-test/suite/tianmu/r/alter_table1.result @@ -9,7 +9,7 @@ CREATE TABLE st1 name VARCHAR(25), deptId INT(11), salary FLOAT -); +) engine=tianmu; alter table st1 add column test varchar(4); alter table st1 add column test1 varchar(4) after test; alter table st1 add column test3 varchar(4); @@ -36,7 +36,7 @@ CREATE TABLE st2 name VARCHAR(25), deptId INT(11), salary FLOAT -); +) engine=tianmu; insert into st2 values(3,'haha1',45,4.5); insert into st2 values(1,'haha2',12,1.2); insert into st2 values(4,'haha3',31,3.2); @@ -68,7 +68,7 @@ name VARCHAR(25), deptId INT(11), salary FLOAT, PRIMARY KEY(id) -); +) engine=tianmu; ALTER TABLE st3 RENAME to st4 ; ALTER TABLE st4 RENAME as st3 ; ALTER TABLE st3 RENAME to st4 ; @@ -87,7 +87,7 @@ name VARCHAR(25), deptId INT(11), salary FLOAT, PRIMARY KEY(id) -); +) engine=tianmu; alter table st5 modify name varchar(20); alter table st5 modify column name varchar(24); alter table st5 modify column name varchar(15) after salary; @@ -98,7 +98,7 @@ name VARCHAR(25), deptId INT(11), salary FLOAT, PRIMARY KEY(id) -); +) engine=tianmu; alter table st6 rename to st7,add name1 varchar(10); desc st7; Field Type Null Key Default Extra @@ -138,4 +138,9 @@ salary float YES NULL name varchar(20) YES NULL name2 varchar(10) YES NULL name3 varchar(10) YES NULL +DROP TABLE st8; +DROP TABLE st5; +DROP TABLE st4; +DROP TABLE st2; +DROP TABLE st1; drop database alter_table1_test; diff --git a/mysql-test/suite/tianmu/r/issue1861.result b/mysql-test/suite/tianmu/r/issue1861.result new file mode 100644 index 000000000..eb9d57c40 --- /dev/null +++ b/mysql-test/suite/tianmu/r/issue1861.result @@ -0,0 +1,61 @@ +DROP DATABASE IF EXISTS issue1861_test; +CREATE DATABASE issue1861_test; +USE issue1861_test; +CREATE TABLE `c1fg_pool` ( +`ROW_ID` decimal(18,0) NOT NULL DEFAULT '-1', +PRIMARY KEY (`ROW_ID`) +) ENGINE=TIANMU DEFAULT CHARSET=utf8mb4; +CREATE TABLE `c1fg_pl_node` ( +`ROW_ID` decimal(18,0) NOT NULL DEFAULT '-1', +`POOL_ID` decimal(18,0) NOT NULL DEFAULT '-1', +`COMPANY_ID` decimal(18,0) DEFAULT '-1', +PRIMARY KEY (`ROW_ID`) +) ENGINE=TIANMU DEFAULT CHARSET=utf8mb4; +CREATE TABLE `c1fg_pl_account` ( +`ROW_ID` decimal(18,0) NOT NULL DEFAULT '-1', +`NODE_ID` decimal(18,0) NOT NULL DEFAULT '-1', +PRIMARY KEY (`ROW_ID`) +) ENGINE=TIANMU DEFAULT CHARSET=utf8mb4; +CREATE TABLE `c1fg_pl_subsidiary` ( +`ROW_ID` decimal(18,0) NOT NULL DEFAULT '-1', +`ACCOUNT_ID` decimal(18,0) NOT NULL DEFAULT '-1', +`FISCAL_DATE` date DEFAULT NULL, +`DR_AMOUNT` decimal(16,2) NOT NULL DEFAULT '0.00' , +PRIMARY KEY (`ROW_ID`) +) ENGINE=TIANMU DEFAULT CHARSET=utf8mb4; +CREATE TABLE `c1md_company` ( +`ROW_ID` decimal(18,0) NOT NULL DEFAULT '-1' , +`SYS_ID` decimal(18,0) NOT NULL DEFAULT '-1' , +PRIMARY KEY (`ROW_ID`) +) ENGINE=TIANMU DEFAULT CHARSET=utf8mb4; +SELECT B.company_id, +'上划日' ud_type, +2 sort_no, +'合计' fiscal_date, +DATE_FORMAT('2023-06-06', '%Y-%m-%d') fiscal_date, +C.pl_amount +FROM c1fg_pool A +INNER JOIN c1fg_pl_node B +ON A.row_id = B.pool_id +LEFT JOIN (SELECT c.node_id, SUM(d.dr_amount) pl_amount +FROM c1fg_pool a, +c1fg_pl_node b, +c1fg_pl_account c, +c1fg_pl_subsidiary d +WHERE a.row_id = b.pool_id +AND b.row_id = c.node_id +AND c.row_id = d.account_id +AND b.company_id IN +(SELECT t1.row_id +FROM c1md_company t1, c1md_company t2 +WHERE t1.sys_id = t2.sys_id) +GROUP BY c.node_id) C +ON B.row_id = C.node_id +ORDER BY B.company_id; +company_id ud_type sort_no fiscal_date fiscal_date pl_amount +DROP TABLE c1md_company; +DROP TABLE c1fg_pl_subsidiary; +DROP TABLE c1fg_pl_account; +DROP TABLE c1fg_pl_node; +DROP TABLE c1fg_pool; +DROP DATABASE issue1861_test; diff --git a/mysql-test/suite/tianmu/r/issue1865.result b/mysql-test/suite/tianmu/r/issue1865.result index 0db382140..9612b32ae 100644 --- a/mysql-test/suite/tianmu/r/issue1865.result +++ b/mysql-test/suite/tianmu/r/issue1865.result @@ -1,5 +1,6 @@ DROP DATABASE IF EXISTS issue1865_test_db; CREATE DATABASE issue1865_test_db; +USE issue1865_test_db; create table t1 (a int default 100, b int, c varchar(60))engine=tianmu; load data infile 'MYSQL_TEST_DIR/suite/tianmu/std_data/issue1865.dat' into table t1 (a, c); select * from t1; diff --git a/mysql-test/suite/tianmu/r/issue848.result b/mysql-test/suite/tianmu/r/issue848.result index e37ed1c6d..d5a931288 100644 --- a/mysql-test/suite/tianmu/r/issue848.result +++ b/mysql-test/suite/tianmu/r/issue848.result @@ -1,5 +1,6 @@ -create database test_issue848; -use test_issue848; +DROP DATABASE IF EXISTS issue848_test; +CREATE DATABASE issue848_test; +USE issue848_test; CREATE TABLE `t1` ( `c_char` char(10) DEFAULT NULL COMMENT 'char', `c_varchar` varchar(10) DEFAULT NULL COMMENT 'varchar', @@ -29,7 +30,7 @@ t1 CREATE TABLE `t1` ( `c_varchar` varchar(10) CHARACTER SET gbk DEFAULT NULL COMMENT 'varchar', `c_text` text CHARACTER SET gbk COMMENT 'text' ) ENGINE=TIANMU DEFAULT CHARSET=latin1 -alter table t1 add column ex_column char(30); +ALTER TABLE t1 ADD COLUMN ex_column char(30) AFTER c_text; show create table t1; Table Create Table t1 CREATE TABLE `t1` ( @@ -38,15 +39,15 @@ t1 CREATE TABLE `t1` ( `c_text` text CHARACTER SET gbk COMMENT 'text', `ex_column` char(30) DEFAULT NULL ) ENGINE=TIANMU DEFAULT CHARSET=latin1 -ALTER TABLE t1 MODIFY c_char char(10) CHARACTER SET UTF8MB4; -ALTER TABLE t1 MODIFY c_varchar char(10) CHARACTER SET UTF8MB4; -ALTER TABLE t1 MODIFY c_text char(10) CHARACTER SET UTF8MB4; +ALTER TABLE t1 MODIFY c_char char(15) CHARACTER SET UTF8MB4; +ALTER TABLE t1 MODIFY c_varchar char(15) CHARACTER SET UTF8MB4; +ALTER TABLE t1 MODIFY c_text char(15) CHARACTER SET UTF8MB4; show create table t1; Table Create Table t1 CREATE TABLE `t1` ( - `c_char` char(10) CHARACTER SET utf8mb4 DEFAULT NULL, - `c_varchar` char(10) CHARACTER SET utf8mb4 DEFAULT NULL, - `c_text` char(10) CHARACTER SET utf8mb4 DEFAULT NULL, + `c_char` char(15) CHARACTER SET utf8mb4 DEFAULT NULL, + `c_varchar` char(15) CHARACTER SET utf8mb4 DEFAULT NULL, + `c_text` char(15) CHARACTER SET utf8mb4 DEFAULT NULL, `ex_column` char(30) DEFAULT NULL ) ENGINE=TIANMU DEFAULT CHARSET=latin1 select hex(c_char),hex(c_varchar),hex(c_text) from t1; @@ -66,5 +67,5 @@ t1 CREATE TABLE `t1` ( `c_text` char(10) CHARACTER SET gbk DEFAULT NULL, `ex_column` char(30) DEFAULT NULL ) ENGINE=TIANMU DEFAULT CHARSET=latin1 -drop table t1; -drop database test_issue848; +DROP TABLE t1; +DROP DATABASE issue848_test; diff --git a/mysql-test/suite/tianmu/t/alter_table1.test b/mysql-test/suite/tianmu/t/alter_table1.test index d5bea703f..17bfd8f29 100644 --- a/mysql-test/suite/tianmu/t/alter_table1.test +++ b/mysql-test/suite/tianmu/t/alter_table1.test @@ -18,7 +18,7 @@ CREATE TABLE st1 name VARCHAR(25), deptId INT(11), salary FLOAT -); +) engine=tianmu; alter table st1 add column test varchar(4); alter table st1 add column test1 varchar(4) after test; @@ -48,7 +48,7 @@ CREATE TABLE st2 name VARCHAR(25), deptId INT(11), salary FLOAT -); +) engine=tianmu; insert into st2 values(3,'haha1',45,4.5); insert into st2 values(1,'haha2',12,1.2); @@ -72,7 +72,7 @@ name VARCHAR(25), deptId INT(11), salary FLOAT, PRIMARY KEY(id) -); +) engine=tianmu; ALTER TABLE st3 RENAME to st4 ; ALTER TABLE st4 RENAME as st3 ; @@ -95,7 +95,7 @@ name VARCHAR(25), deptId INT(11), salary FLOAT, PRIMARY KEY(id) -); +) engine=tianmu; alter table st5 modify name varchar(20); alter table st5 modify column name varchar(24); @@ -112,7 +112,7 @@ name VARCHAR(25), deptId INT(11), salary FLOAT, PRIMARY KEY(id) -); +) engine=tianmu; alter table st6 rename to st7,add name1 varchar(10); desc st7; @@ -130,4 +130,9 @@ alter table st8 add name3 varchar(10),modify name varchar(20); desc st8; # clean up +DROP TABLE st8; +DROP TABLE st5; +DROP TABLE st4; +DROP TABLE st2; +DROP TABLE st1; drop database alter_table1_test; diff --git a/mysql-test/suite/tianmu/t/issue1523.test b/mysql-test/suite/tianmu/t/issue1523.test index ce0f8fcc7..6762bb0be 100644 --- a/mysql-test/suite/tianmu/t/issue1523.test +++ b/mysql-test/suite/tianmu/t/issue1523.test @@ -66,7 +66,7 @@ select count(*) from t1; --source include/sync_slave_sql_with_master.inc --echo # on slave: -sleep 1; +sleep 5; select last_car_no from t1 where id=1; select count(*) from t1; @@ -85,7 +85,7 @@ select count(*) from t1; --source include/sync_slave_sql_with_master.inc --echo # on slave: -sleep 1; +sleep 5; select last_car_no from t1 where id=1; select count(*) from t1; @@ -101,7 +101,7 @@ select count(*) from t1; --source include/sync_slave_sql_with_master.inc --echo # on slave: -sleep 1; +sleep 5; select last_car_no from t1 where id=9; select count(*) from t1; @@ -114,7 +114,7 @@ select count(*) from t1; --source include/sync_slave_sql_with_master.inc --echo # on slave: -sleep 1; +sleep 5; select last_car_no from t1 where id=1; select count(*) from t1; @@ -128,7 +128,7 @@ select count(*) from t1; --source include/sync_slave_sql_with_master.inc --echo # on slave: -sleep 1; +sleep 5; select last_car_no from t1 where id=10; select count(*) from t1; @@ -143,7 +143,7 @@ select count(*) from t1; --source include/sync_slave_sql_with_master.inc --echo # on slave: -sleep 1; +sleep 5; select last_car_no from t1 where id=11; select count(*) from t1; diff --git a/mysql-test/suite/tianmu/t/issue1861.test b/mysql-test/suite/tianmu/t/issue1861.test new file mode 100644 index 000000000..02ad68d0d --- /dev/null +++ b/mysql-test/suite/tianmu/t/issue1861.test @@ -0,0 +1,78 @@ +--source include/have_tianmu.inc + +--disable_warnings +DROP DATABASE IF EXISTS issue1861_test; + +CREATE DATABASE issue1861_test; +USE issue1861_test; +--enable_warnings + +CREATE TABLE `c1fg_pool` ( + `ROW_ID` decimal(18,0) NOT NULL DEFAULT '-1', + PRIMARY KEY (`ROW_ID`) +) ENGINE=TIANMU DEFAULT CHARSET=utf8mb4; + +CREATE TABLE `c1fg_pl_node` ( + `ROW_ID` decimal(18,0) NOT NULL DEFAULT '-1', + `POOL_ID` decimal(18,0) NOT NULL DEFAULT '-1', + `COMPANY_ID` decimal(18,0) DEFAULT '-1', + PRIMARY KEY (`ROW_ID`) +) ENGINE=TIANMU DEFAULT CHARSET=utf8mb4; + + +CREATE TABLE `c1fg_pl_account` ( + `ROW_ID` decimal(18,0) NOT NULL DEFAULT '-1', + `NODE_ID` decimal(18,0) NOT NULL DEFAULT '-1', + PRIMARY KEY (`ROW_ID`) +) ENGINE=TIANMU DEFAULT CHARSET=utf8mb4; + +CREATE TABLE `c1fg_pl_subsidiary` ( + `ROW_ID` decimal(18,0) NOT NULL DEFAULT '-1', + `ACCOUNT_ID` decimal(18,0) NOT NULL DEFAULT '-1', + `FISCAL_DATE` date DEFAULT NULL, + `DR_AMOUNT` decimal(16,2) NOT NULL DEFAULT '0.00' , + PRIMARY KEY (`ROW_ID`) +) ENGINE=TIANMU DEFAULT CHARSET=utf8mb4; + + +CREATE TABLE `c1md_company` ( + `ROW_ID` decimal(18,0) NOT NULL DEFAULT '-1' , + `SYS_ID` decimal(18,0) NOT NULL DEFAULT '-1' , + PRIMARY KEY (`ROW_ID`) +) ENGINE=TIANMU DEFAULT CHARSET=utf8mb4; + +#--------------------------------------------------- +SELECT B.company_id, + '上划日' ud_type, + 2 sort_no, + '合计' fiscal_date, + DATE_FORMAT('2023-06-06', '%Y-%m-%d') fiscal_date, + C.pl_amount + FROM c1fg_pool A + INNER JOIN c1fg_pl_node B + ON A.row_id = B.pool_id + LEFT JOIN (SELECT c.node_id, SUM(d.dr_amount) pl_amount + FROM c1fg_pool a, + c1fg_pl_node b, + c1fg_pl_account c, + c1fg_pl_subsidiary d + WHERE a.row_id = b.pool_id + AND b.row_id = c.node_id + AND c.row_id = d.account_id + AND b.company_id IN + (SELECT t1.row_id + FROM c1md_company t1, c1md_company t2 + WHERE t1.sys_id = t2.sys_id) + GROUP BY c.node_id) C +ON B.row_id = C.node_id +ORDER BY B.company_id; + + + +DROP TABLE c1md_company; +DROP TABLE c1fg_pl_subsidiary; +DROP TABLE c1fg_pl_account; +DROP TABLE c1fg_pl_node; +DROP TABLE c1fg_pool; + +DROP DATABASE issue1861_test; diff --git a/mysql-test/suite/tianmu/t/issue1865.test b/mysql-test/suite/tianmu/t/issue1865.test index b832ac6e1..e308fae22 100644 --- a/mysql-test/suite/tianmu/t/issue1865.test +++ b/mysql-test/suite/tianmu/t/issue1865.test @@ -6,6 +6,8 @@ DROP DATABASE IF EXISTS issue1865_test_db; CREATE DATABASE issue1865_test_db; +USE issue1865_test_db; + create table t1 (a int default 100, b int, c varchar(60))engine=tianmu; --replace_result $MYSQL_TEST_DIR MYSQL_TEST_DIR diff --git a/mysql-test/suite/tianmu/t/issue848.test b/mysql-test/suite/tianmu/t/issue848.test index 1fb1f3435..0ce3ff1e9 100644 --- a/mysql-test/suite/tianmu/t/issue848.test +++ b/mysql-test/suite/tianmu/t/issue848.test @@ -1,26 +1,34 @@ --source include/have_tianmu.inc -create database test_issue848; -use test_issue848; + +--disable_warnings +DROP DATABASE IF EXISTS issue848_test; +--enable_warnings + +CREATE DATABASE issue848_test; +USE issue848_test; + CREATE TABLE `t1` ( `c_char` char(10) DEFAULT NULL COMMENT 'char', `c_varchar` varchar(10) DEFAULT NULL COMMENT 'varchar', `c_text` text COMMENT 'text') ENGINE=TIANMU DEFAULT CHARSET=GBK; + show create table t1; insert into t1(c_char,c_varchar,c_text) values(x'D6D0B9FA',x'D5E3BDAD',x'BABCD6DD'); + ALTER TABLE t1 DEFAULT CHARACTER SET gbk; show create table t1; ALTER TABLE t1 CHARACTER SET latin1; show create table t1; -alter table t1 add column ex_column char(30); +ALTER TABLE t1 ADD COLUMN ex_column char(30) AFTER c_text; show create table t1; -ALTER TABLE t1 MODIFY c_char char(10) CHARACTER SET UTF8MB4; -ALTER TABLE t1 MODIFY c_varchar char(10) CHARACTER SET UTF8MB4; -ALTER TABLE t1 MODIFY c_text char(10) CHARACTER SET UTF8MB4; +ALTER TABLE t1 MODIFY c_char char(15) CHARACTER SET UTF8MB4; +ALTER TABLE t1 MODIFY c_varchar char(15) CHARACTER SET UTF8MB4; +ALTER TABLE t1 MODIFY c_text char(15) CHARACTER SET UTF8MB4; show create table t1; select hex(c_char),hex(c_varchar),hex(c_text) from t1; @@ -30,5 +38,5 @@ ALTER TABLE t1 CHANGE c_text c_text char(10) CHARACTER SET GBK; select hex(c_char),hex(c_varchar),hex(c_text) from t1; show create table t1; -drop table t1; -drop database test_issue848; +DROP TABLE t1; +DROP DATABASE issue848_test; diff --git a/storage/tianmu/core/query_compile.cpp b/storage/tianmu/core/query_compile.cpp index 0ce07e4e2..d4ef73525 100644 --- a/storage/tianmu/core/query_compile.cpp +++ b/storage/tianmu/core/query_compile.cpp @@ -1145,13 +1145,6 @@ QueryRouteTo Query::Compile(CompiledQuery *compiled_query, SELECT_LEX *selects_l } } - // partial optimization of LOJ conditions, JOIN::optimize(part=3) necessary due to already done basic transformation - // of conditions see comments in sql_select.cc:JOIN::optimize() - if (IsLOJ(join_list) && - ((!sl->join->where_cond) || (sl->join->where_cond && (uint64_t)sl->join->where_cond != 0x01))) { - sl->join->optimize(OptimizePhase::Finish_LOJ_Transform); - } - Item *field_for_subselect; Item *cond_to_reinsert = nullptr; List *list_to_reinsert = nullptr; From 60a8cf71f3632cba2bd28ab1f6f94101abece140 Mon Sep 17 00:00:00 2001 From: RingsC Date: Fri, 9 Jun 2023 10:48:49 +0800 Subject: [PATCH 10/10] fix(tianmu): hotfix corruption in ValueOrNull under multi-thread In multi-thread aggregation, ExpressionColumn will occur double free due to without protection. Thread A will do ValueOrNull::operator ==, but in thread B, it will try to free it. Therefore, it leads to instance crash. --- mysql-test/suite/tianmu/r/issue1855.result | 14 ++++++ mysql-test/suite/tianmu/t/issue1855.test | 50 ++++++++++++++++++- storage/tianmu/core/value_or_null.cpp | 13 +++-- storage/tianmu/core/value_or_null.h | 1 + storage/tianmu/handler/ha_tianmu.cpp | 2 +- .../optimizer/aggregation_algorithm.cpp | 1 + storage/tianmu/optimizer/aggregator_basic.cpp | 14 ++++-- storage/tianmu/optimizer/aggregator_basic.h | 2 + storage/tianmu/optimizer/group_table.cpp | 2 + storage/tianmu/vc/column_share.h | 1 + storage/tianmu/vc/expr_column.cpp | 5 ++ 11 files changed, 93 insertions(+), 12 deletions(-) diff --git a/mysql-test/suite/tianmu/r/issue1855.result b/mysql-test/suite/tianmu/r/issue1855.result index fa4d86f7f..1270e52cc 100644 --- a/mysql-test/suite/tianmu/r/issue1855.result +++ b/mysql-test/suite/tianmu/r/issue1855.result @@ -10,3 +10,17 @@ SUM(LENGTH(p_val)) SELECT SUM(LENGTH(p_val)/2) FROM ttt; SUM(LENGTH(p_val)/2) 3.0000 +Variable_name Value +tianmu_groupby_parallel_degree 0 +tianmu_groupby_parallel_rows_minimum 655360 +Variable_name Value +tianmu_groupby_parallel_degree 4 +tianmu_groupby_parallel_rows_minimum 100 +SUM(LENGTH(p_id)) +158949 +SUM(LENGTH(p_id)/2) +79474.5000 +SUM(LENGTH(p_val)) +38901 +SUM(LENGTH(p_val)/2) +19450.5000 diff --git a/mysql-test/suite/tianmu/t/issue1855.test b/mysql-test/suite/tianmu/t/issue1855.test index 99b6c1a72..914e401d2 100644 --- a/mysql-test/suite/tianmu/t/issue1855.test +++ b/mysql-test/suite/tianmu/t/issue1855.test @@ -32,8 +32,56 @@ SELECT SUM(LENGTH(p_val)) FROM ttt; SELECT SUM(LENGTH(p_val)/2) FROM ttt; --disable_query_log -DROP TABLE ttt; +--DELIMITER // +CREATE PROCEDURE insert_data() +wholeblock:BEGIN + DECLARE x INT; + DECLARE str VARCHAR(50); + DECLARE str1 VARCHAR(50); + SET x = 0; + SET str = 'this is '; + SET str1 = ''; + + loop_label: LOOP + IF x > 10000 THEN + LEAVE loop_label; + END IF; + + SET str1 = substring(md5(rand()), 1, 10); + SET str = CONCAT(str,str1,','); + SET str = CONCAT(str,x,','); + + INSERT INTO ttt VALUES (str, x); + SET str =''; + SET x = x + 1; + ITERATE loop_label; + END LOOP; +END// + +--DELIMITER ; + +CALL insert_data(); + +DROP PROCEDURE insert_data; + +SHOW VARIABLES LIKE "%tianmu_groupby_parallel%"; + +SET GLOBAL tianmu_groupby_parallel_rows_minimum = 100; +SET GLOBAL tianmu_groupby_parallel_degree = 4; + +SHOW VARIABLES LIKE "%tianmu_groupby_parallel%"; + +SELECT SUM(LENGTH(p_id)) FROM ttt; + +SELECT SUM(LENGTH(p_id)/2) FROM ttt; + +SELECT SUM(LENGTH(p_val)) FROM ttt; + +SELECT SUM(LENGTH(p_val)/2) FROM ttt; + + +DROP TABLE ttt; DROP DATABASE issue1855_test_db; --enable_query_log diff --git a/storage/tianmu/core/value_or_null.cpp b/storage/tianmu/core/value_or_null.cpp index 2c44df2a5..d284fefa1 100644 --- a/storage/tianmu/core/value_or_null.cpp +++ b/storage/tianmu/core/value_or_null.cpp @@ -23,6 +23,7 @@ namespace Tianmu { namespace core { + void ValueOrNull::SetBString(const types::BString &tianmu_s) { Clear(); if (!tianmu_s.IsNull()) { @@ -44,7 +45,7 @@ void ValueOrNull::SetBString(const types::BString &tianmu_s) { } void ValueOrNull::MakeStringOwner() { - if (!sp || string_owner) + if (!sp || !len || string_owner) return; char *tmp = new (std::nothrow) char[len + 1]; @@ -70,17 +71,14 @@ void ValueOrNull::GetBString(types::BString &tianmu_s) const { tianmu_s = rcs_null; } else { // copy either from sp or x - if (sp) - tianmu_s = types::BString(sp, len, true); - else - tianmu_s = types::TianmuNum(x).ToBString(); + tianmu_s = (sp) ? types::BString(sp, len, true) : types::TianmuNum(x).ToBString(); tianmu_s.MakePersistent(); } } ValueOrNull::ValueOrNull(ValueOrNull const &von) : x(von.x), len(von.len), string_owner(von.string_owner), null(von.null) { - if (string_owner) { + if (string_owner && von.sp && len > 0) { sp = new (std::nothrow) char[len + 1]; if (sp) { @@ -99,7 +97,7 @@ ValueOrNull &ValueOrNull::operator=(ValueOrNull const &von) { if (this == &von) return *this; - if (von.string_owner) { + if (von.string_owner && von.sp) { sp = new (std::nothrow) char[von.len + 1]; if (sp) { std::memset(sp, '\0', von.len + 1); @@ -140,5 +138,6 @@ void ValueOrNull::Swap(ValueOrNull &von) { std::swap(string_owner, von.string_owner); } } + } // namespace core } // namespace Tianmu diff --git a/storage/tianmu/core/value_or_null.h b/storage/tianmu/core/value_or_null.h index f07f71ddc..3bef51b8e 100644 --- a/storage/tianmu/core/value_or_null.h +++ b/storage/tianmu/core/value_or_null.h @@ -102,6 +102,7 @@ class ValueOrNull final { string_owner = false; null = true; + len = 0; } } diff --git a/storage/tianmu/handler/ha_tianmu.cpp b/storage/tianmu/handler/ha_tianmu.cpp index 02ad61646..f720432bb 100644 --- a/storage/tianmu/handler/ha_tianmu.cpp +++ b/storage/tianmu/handler/ha_tianmu.cpp @@ -2623,7 +2623,7 @@ static MYSQL_SYSVAR_BOOL(groupby_speedup, tianmu_sysvar_groupby_speedup, PLUGIN_ static MYSQL_SYSVAR_UINT(groupby_parallel_degree, tianmu_sysvar_groupby_parallel_degree, PLUGIN_VAR_INT, "group by parallel degree, number of worker threads", nullptr, nullptr, 8, 0, INT32_MAX, 0); static MYSQL_SYSVAR_ULONGLONG(groupby_parallel_rows_minimum, tianmu_sysvar_groupby_parallel_rows_minimum, - PLUGIN_VAR_LONGLONG, "group by parallel minimum rows", nullptr, nullptr, 655360, 655360, + PLUGIN_VAR_LONGLONG, "group by parallel minimum rows", nullptr, nullptr, 655360, 100, INT64_MAX, 0); static MYSQL_SYSVAR_UINT(slow_query_record_interval, tianmu_sysvar_slow_query_record_interval, PLUGIN_VAR_INT, "slow Query Threshold of recording tianmu logs, in seconds", nullptr, nullptr, 0, 0, INT32_MAX, diff --git a/storage/tianmu/optimizer/aggregation_algorithm.cpp b/storage/tianmu/optimizer/aggregation_algorithm.cpp index 54161928a..a80aa3374 100644 --- a/storage/tianmu/optimizer/aggregation_algorithm.cpp +++ b/storage/tianmu/optimizer/aggregation_algorithm.cpp @@ -1072,6 +1072,7 @@ void AggregationWorkerEnt::DistributeAggreTaskAverage(MIIterator &mit, uint64_t for (uint i = 0; i < vTask.size(); ++i) { if (dims.NoDimsUsed() == 0) dims.SetAll(); + auto &mii = taskIterator.emplace_back(mit, true); mii.SetTaskNum(vTask.size()); mii.SetTaskId(i); diff --git a/storage/tianmu/optimizer/aggregator_basic.cpp b/storage/tianmu/optimizer/aggregator_basic.cpp index e1747647b..6016262a6 100644 --- a/storage/tianmu/optimizer/aggregator_basic.cpp +++ b/storage/tianmu/optimizer/aggregator_basic.cpp @@ -24,15 +24,23 @@ namespace Tianmu { namespace core { void AggregatorSum64::PutAggregatedValue(unsigned char *buf, int64_t v, int64_t factor) { + std::scoped_lock scp_lk(aggr_mtx); + stats_updated = false; int64_t *p = (int64_t *)buf; if (*p == common::NULL_VALUE_64) { *p = 0; } - double overflow_check = double(*p) + double(v) * factor; + + long double overflow_check = double(*p) + double(v) * factor; if (overflow_check > std::numeric_limits::max() || - overflow_check < std::numeric_limits::min()) - throw common::NotImplementedException("Aggregation overflow."); + overflow_check < std::numeric_limits::min()) { + char str_buff[1024] = {'\0'}; + sprintf(str_buff, "Aggregation overflow for long double. over_check: %Lf", overflow_check); + + throw common::InternalException(str_buff); + } + *p += v * factor; } diff --git a/storage/tianmu/optimizer/aggregator_basic.h b/storage/tianmu/optimizer/aggregator_basic.h index 88275ee27..1e5c5d80d 100644 --- a/storage/tianmu/optimizer/aggregator_basic.h +++ b/storage/tianmu/optimizer/aggregator_basic.h @@ -18,6 +18,7 @@ #define TIANMU_CORE_AGGREGATOR_BASIC_H_ #pragma once +#include #include "optimizer/aggregator.h" namespace Tianmu { @@ -87,6 +88,7 @@ class AggregatorSum64 : public TIANMUAggregator { } private: + std::mutex aggr_mtx; int64_t pack_sum; int64_t pack_min; // min and max are used to check whether a pack may update // sum (i.e. both 0 means "no change") diff --git a/storage/tianmu/optimizer/group_table.cpp b/storage/tianmu/optimizer/group_table.cpp index 2c1dcfd4a..3b0d32ce0 100644 --- a/storage/tianmu/optimizer/group_table.cpp +++ b/storage/tianmu/optimizer/group_table.cpp @@ -569,9 +569,11 @@ bool GroupTable::PutAggregatedValue(int col, int64_t row, MIIterator &mit, int64 DEBUG_ASSERT(gdistinct[col]); if (vc[col]->IsNull(mit)) return true; // omit nulls + GDTResult res = gdistinct[col]->Add(row, mit); if (res == GDTResult::GDT_EXISTS) return true; // value found, do not aggregate it again + if (res == GDTResult::GDT_FULL) { // if (gdistinct[col]->AlreadyFull()) // not_full = false; // disable also the main grouping table (if it is a diff --git a/storage/tianmu/vc/column_share.h b/storage/tianmu/vc/column_share.h index 38ddd1115..e1dc6ea01 100644 --- a/storage/tianmu/vc/column_share.h +++ b/storage/tianmu/vc/column_share.h @@ -140,6 +140,7 @@ class ColumnShare final { bool has_filter_hist = false; bool has_filter_bloom = false; }; + } // namespace core } // namespace Tianmu diff --git a/storage/tianmu/vc/expr_column.cpp b/storage/tianmu/vc/expr_column.cpp index 22184e71c..013958049 100644 --- a/storage/tianmu/vc/expr_column.cpp +++ b/storage/tianmu/vc/expr_column.cpp @@ -16,6 +16,7 @@ */ #include "expr_column.h" +#include #include "core/mysql_expression.h" #include "optimizer/compile/compiled_query.h" #include "vc/tianmu_attr.h" @@ -131,6 +132,9 @@ bool ExpressionColumn::FeedArguments(const core::MIIterator &mit) { } int64_t ExpressionColumn::GetValueInt64Impl(const core::MIIterator &mit) { + static std::mutex scp_mutex; + std::scoped_lock lock(scp_mutex); + if (FeedArguments(mit)) last_val_ = expr_->Evaluate(); @@ -152,6 +156,7 @@ int64_t ExpressionColumn::GetValueInt64Impl(const core::MIIterator &mit) { val_it.second->Clear_SP(); } } + return last_val_->Get64(); }