-
Notifications
You must be signed in to change notification settings - Fork 5.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
lightning: support prepared statement and client stmt cache in logical import mode #55482
Changes from all commits
d457da8
8412357
b0c79df
7237d94
df855d6
07592bb
4be22d7
5243b09
ee6704e
cb332bf
1fd3ce7
17db4e6
8f2437e
afe73a5
3d6e4e4
4dc3fdc
a80c974
066f7f8
5b808ae
132c4a3
2c0bc66
539a0d1
3e08969
0bae547
0c25621
1ce0977
662f938
6727f20
8869768
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,3 +2,4 @@ | |
backend = "tidb" | ||
on-duplicate = "error" | ||
logical-import-batch-rows = 1 | ||
logical-import-prep-stmt = true |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,3 +2,4 @@ | |
backend = "tidb" | ||
on-duplicate = "ignore" | ||
logical-import-batch-rows = 1 | ||
logical-import-prep-stmt = true |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,3 +2,4 @@ | |
backend = "tidb" | ||
on-duplicate = "replace" | ||
logical-import-batch-rows = 1 | ||
logical-import-prep-stmt = true |
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -21,6 +21,7 @@ import ( | |||
"fmt" | ||||
"strconv" | ||||
"strings" | ||||
"sync" | ||||
"time" | ||||
|
||||
gmysql "github.com/go-sql-driver/mysql" | ||||
|
@@ -43,6 +44,8 @@ import ( | |||
"github.com/pingcap/tidb/pkg/table" | ||||
"github.com/pingcap/tidb/pkg/types" | ||||
"github.com/pingcap/tidb/pkg/util/dbutil" | ||||
"github.com/pingcap/tidb/pkg/util/hack" | ||||
"github.com/pingcap/tidb/pkg/util/kvcache" | ||||
"github.com/pingcap/tidb/pkg/util/redact" | ||||
"go.uber.org/zap" | ||||
"go.uber.org/zap/zapcore" | ||||
|
@@ -56,12 +59,16 @@ var extraHandleTableColumn = &table.Column{ | |||
|
||||
const ( | ||||
writeRowsMaxRetryTimes = 3 | ||||
// To limit memory usage for prepared statements. | ||||
prepStmtCacheSize uint = 100 | ||||
) | ||||
|
||||
type tidbRow struct { | ||||
insertStmt string | ||||
path string | ||||
offset int64 | ||||
insertStmt string | ||||
preparedInsertStmt string | ||||
values []any | ||||
path string | ||||
offset int64 | ||||
} | ||||
|
||||
var emptyTiDBRow = tidbRow{ | ||||
|
@@ -91,8 +98,9 @@ type tidbEncoder struct { | |||
// the there are enough columns. | ||||
columnCnt int | ||||
// data file path | ||||
path string | ||||
logger log.Logger | ||||
path string | ||||
logger log.Logger | ||||
prepStmt bool | ||||
} | ||||
|
||||
type encodingBuilder struct{} | ||||
|
@@ -106,10 +114,11 @@ func NewEncodingBuilder() encode.EncodingBuilder { | |||
// It implements the `backend.EncodingBuilder` interface. | ||||
func (*encodingBuilder) NewEncoder(_ context.Context, config *encode.EncodingConfig) (encode.Encoder, error) { | ||||
return &tidbEncoder{ | ||||
mode: config.SQLMode, | ||||
tbl: config.Table, | ||||
path: config.Path, | ||||
logger: config.Logger, | ||||
mode: config.SQLMode, | ||||
tbl: config.Table, | ||||
path: config.Path, | ||||
logger: config.Logger, | ||||
prepStmt: config.LogicalImportPrepStmt, | ||||
}, nil | ||||
} | ||||
|
||||
|
@@ -288,6 +297,16 @@ func (*targetInfoGetter) CheckRequirements(ctx context.Context, _ *backend.Check | |||
return nil | ||||
} | ||||
|
||||
// stmtKey defines key for stmtCache. | ||||
type stmtKey struct { | ||||
query string | ||||
} | ||||
|
||||
// Hash implements SimpleLRUCache.Key. | ||||
func (k *stmtKey) Hash() []byte { | ||||
return hack.Slice(k.query) | ||||
} | ||||
|
||||
type tidbBackend struct { | ||||
db *sql.DB | ||||
conflictCfg config.Conflict | ||||
|
@@ -301,6 +320,9 @@ type tidbBackend struct { | |||
// affecting the cluster too much. | ||||
maxChunkSize uint64 | ||||
maxChunkRows int | ||||
// implement stmtCache to improve performance | ||||
stmtCache *kvcache.SimpleLRUCache | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as long as the table is importing, the prepared stmt is needed, no need such LRU There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The reason to use LRU here is that the batch insert prepared statement may not be the same. where the insert/replace statement is constrained by There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. depends on row size is unstable for prepared stmt, in worse case, every prepared stmt is different. we can ignore batch-size in this case, and only honors batch-rows, cc @lance6716 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the default There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not the author of #46607, but we've got customer which database is unable to handle 96 KB transactions thus requiring the limit to be lowered. The |
||||
stmtCacheMutex sync.RWMutex | ||||
} | ||||
|
||||
var _ backend.Backend = (*tidbBackend)(nil) | ||||
|
@@ -334,13 +356,23 @@ func NewTiDBBackend( | |||
log.FromContext(ctx).Warn("unsupported conflict strategy for TiDB backend, overwrite with `error`") | ||||
onDuplicate = config.ErrorOnDup | ||||
} | ||||
var stmtCache *kvcache.SimpleLRUCache | ||||
if cfg.TikvImporter.LogicalImportPrepStmt { | ||||
stmtCache = kvcache.NewSimpleLRUCache(prepStmtCacheSize, 0, 0) | ||||
stmtCache.SetOnEvict(func(_ kvcache.Key, value kvcache.Value) { | ||||
stmt := value.(*sql.Stmt) | ||||
stmt.Close() | ||||
}) | ||||
} | ||||
return &tidbBackend{ | ||||
db: db, | ||||
conflictCfg: conflict, | ||||
onDuplicate: onDuplicate, | ||||
errorMgr: errorMgr, | ||||
maxChunkSize: uint64(cfg.TikvImporter.LogicalImportBatchSize), | ||||
maxChunkRows: cfg.TikvImporter.LogicalImportBatchRows, | ||||
db: db, | ||||
conflictCfg: conflict, | ||||
onDuplicate: onDuplicate, | ||||
errorMgr: errorMgr, | ||||
maxChunkSize: uint64(cfg.TikvImporter.LogicalImportBatchSize), | ||||
maxChunkRows: cfg.TikvImporter.LogicalImportBatchRows, | ||||
stmtCache: stmtCache, | ||||
stmtCacheMutex: sync.RWMutex{}, | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||
} | ||||
} | ||||
|
||||
|
@@ -556,16 +588,25 @@ func (enc *tidbEncoder) Encode(row []types.Datum, _ int64, columnPermutation []i | |||
return emptyTiDBRow, errors.Errorf("column count mismatch, at most %d but got %d", len(enc.columnIdx), len(row)) | ||||
} | ||||
|
||||
var encoded strings.Builder | ||||
var encoded, preparedInsertStmt strings.Builder | ||||
var values []any | ||||
encoded.Grow(8 * len(row)) | ||||
encoded.WriteByte('(') | ||||
if enc.prepStmt { | ||||
preparedInsertStmt.Grow(2 * len(row)) | ||||
preparedInsertStmt.WriteByte('(') | ||||
dbsid marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
values = make([]any, 0, len(row)) | ||||
} | ||||
cnt := 0 | ||||
for i, field := range row { | ||||
if enc.columnIdx[i] < 0 { | ||||
continue | ||||
} | ||||
if cnt > 0 { | ||||
encoded.WriteByte(',') | ||||
if enc.prepStmt { | ||||
preparedInsertStmt.WriteByte(',') | ||||
} | ||||
} | ||||
datum := field | ||||
if err := enc.appendSQL(&encoded, &datum, getColumnByIndex(cols, enc.columnIdx[i])); err != nil { | ||||
|
@@ -576,13 +617,23 @@ func (enc *tidbEncoder) Encode(row []types.Datum, _ int64, columnPermutation []i | |||
) | ||||
return nil, err | ||||
} | ||||
if enc.prepStmt { | ||||
preparedInsertStmt.WriteByte('?') | ||||
values = append(values, datum.GetValue()) | ||||
} | ||||
cnt++ | ||||
} | ||||
encoded.WriteByte(')') | ||||
if enc.prepStmt { | ||||
preparedInsertStmt.WriteByte(')') | ||||
} | ||||
|
||||
return tidbRow{ | ||||
insertStmt: encoded.String(), | ||||
path: enc.path, | ||||
offset: offset, | ||||
insertStmt: encoded.String(), | ||||
preparedInsertStmt: preparedInsertStmt.String(), | ||||
values: values, | ||||
path: enc.path, | ||||
offset: offset, | ||||
}, nil | ||||
} | ||||
|
||||
|
@@ -665,8 +716,9 @@ rowLoop: | |||
} | ||||
|
||||
type stmtTask struct { | ||||
rows tidbRows | ||||
stmt string | ||||
rows tidbRows | ||||
stmt string | ||||
values []any | ||||
} | ||||
|
||||
// WriteBatchRowsToDB write rows in batch mode, which will insert multiple rows like this: | ||||
|
@@ -679,14 +731,23 @@ func (be *tidbBackend) WriteBatchRowsToDB(ctx context.Context, tableName string, | |||
} | ||||
// Note: we are not going to do interpolation (prepared statements) to avoid | ||||
// complication arise from data length overflow of BIT and BINARY columns | ||||
var values []any | ||||
if be.stmtCache != nil && len(rows) > 0 { | ||||
values = make([]any, 0, len(rows[0].values)*len(rows)) | ||||
} | ||||
stmtTasks := make([]stmtTask, 1) | ||||
for i, row := range rows { | ||||
if i != 0 { | ||||
insertStmt.WriteByte(',') | ||||
} | ||||
insertStmt.WriteString(row.insertStmt) | ||||
if be.stmtCache != nil { | ||||
insertStmt.WriteString(row.preparedInsertStmt) | ||||
values = append(values, row.values...) | ||||
} else { | ||||
insertStmt.WriteString(row.insertStmt) | ||||
} | ||||
} | ||||
stmtTasks[0] = stmtTask{rows, insertStmt.String()} | ||||
stmtTasks[0] = stmtTask{rows, insertStmt.String(), values} | ||||
return be.execStmts(ctx, stmtTasks, tableName, true) | ||||
} | ||||
|
||||
|
@@ -715,8 +776,12 @@ func (be *tidbBackend) WriteRowsToDB(ctx context.Context, tableName string, colu | |||
for _, row := range rows { | ||||
var finalInsertStmt strings.Builder | ||||
finalInsertStmt.WriteString(is) | ||||
finalInsertStmt.WriteString(row.insertStmt) | ||||
stmtTasks = append(stmtTasks, stmtTask{[]tidbRow{row}, finalInsertStmt.String()}) | ||||
if be.stmtCache != nil { | ||||
finalInsertStmt.WriteString(row.preparedInsertStmt) | ||||
} else { | ||||
finalInsertStmt.WriteString(row.insertStmt) | ||||
} | ||||
stmtTasks = append(stmtTasks, stmtTask{[]tidbRow{row}, finalInsertStmt.String(), row.values}) | ||||
} | ||||
return be.execStmts(ctx, stmtTasks, tableName, false) | ||||
} | ||||
|
@@ -754,8 +819,34 @@ stmtLoop: | |||
err error | ||||
) | ||||
for i := 0; i < writeRowsMaxRetryTimes; i++ { | ||||
stmt := stmtTask.stmt | ||||
result, err = be.db.ExecContext(ctx, stmt) | ||||
query := stmtTask.stmt | ||||
if be.stmtCache != nil { | ||||
var prepStmt *sql.Stmt | ||||
key := &stmtKey{query: query} | ||||
be.stmtCacheMutex.RLock() | ||||
stmt, ok := be.stmtCache.Get(key) | ||||
be.stmtCacheMutex.RUnlock() | ||||
if ok { | ||||
prepStmt = stmt.(*sql.Stmt) | ||||
} else if stmt, err := be.db.PrepareContext(ctx, query); err == nil { | ||||
be.stmtCacheMutex.Lock() | ||||
// check again if the key is already in the cache | ||||
// to avoid override existing stmt without closing it | ||||
if cachedStmt, ok := be.stmtCache.Get(key); !ok { | ||||
prepStmt = stmt | ||||
be.stmtCache.Put(key, stmt) | ||||
} else { | ||||
prepStmt = cachedStmt.(*sql.Stmt) | ||||
stmt.Close() | ||||
} | ||||
be.stmtCacheMutex.Unlock() | ||||
} else { | ||||
return errors.Trace(err) | ||||
} | ||||
result, err = prepStmt.ExecContext(ctx, stmtTask.values...) | ||||
} else { | ||||
result, err = be.db.ExecContext(ctx, query) | ||||
} | ||||
if err == nil { | ||||
affected, err2 := result.RowsAffected() | ||||
if err2 != nil { | ||||
|
@@ -776,7 +867,7 @@ stmtLoop: | |||
|
||||
if !common.IsContextCanceledError(err) { | ||||
log.FromContext(ctx).Error("execute statement failed", | ||||
zap.Array("rows", stmtTask.rows), zap.String("stmt", redact.Value(stmt)), zap.Error(err)) | ||||
zap.Array("rows", stmtTask.rows), zap.String("stmt", redact.Value(query)), zap.Error(err)) | ||||
} | ||||
// It's batch mode, just return the error. Caller will fall back to row-by-row mode. | ||||
if batch { | ||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(I'm not sure why the
kvcache.Key
interface call itHash
. Hash means a fixed-length digest)