From bb2e845f7125335b772cf403c94aa89384830276 Mon Sep 17 00:00:00 2001 From: lance6716 Date: Fri, 2 Jun 2023 00:46:42 +0800 Subject: [PATCH] lightning: fix risk of OOM (#40443) (#44333) close pingcap/tidb#40400 --- br/pkg/lightning/mydump/BUILD.bazel | 1 + br/pkg/lightning/mydump/csv_parser.go | 21 +++++++++++++++- br/pkg/lightning/mydump/csv_parser_test.go | 24 +++++++++++++++++++ .../errData/db-schema-create.sql | 1 + .../lightning_csv/errData/db.test-schema.sql | 1 + br/tests/lightning_csv/errData/db.test.1.csv | 3 +++ br/tests/lightning_csv/run.sh | 8 +++++++ config/config.go | 2 ++ tidb-server/main.go | 2 +- 9 files changed, 61 insertions(+), 2 deletions(-) create mode 100755 br/tests/lightning_csv/errData/db-schema-create.sql create mode 100755 br/tests/lightning_csv/errData/db.test-schema.sql create mode 100755 br/tests/lightning_csv/errData/db.test.1.csv diff --git a/br/pkg/lightning/mydump/BUILD.bazel b/br/pkg/lightning/mydump/BUILD.bazel index 6bf020e2feb6b..a4aa1626afc46 100644 --- a/br/pkg/lightning/mydump/BUILD.bazel +++ b/br/pkg/lightning/mydump/BUILD.bazel @@ -23,6 +23,7 @@ go_library( "//br/pkg/lightning/metric", "//br/pkg/lightning/worker", "//br/pkg/storage", + "//config", "//parser/mysql", "//types", "//util/filter", diff --git a/br/pkg/lightning/mydump/csv_parser.go b/br/pkg/lightning/mydump/csv_parser.go index 96de51bd49c73..b7d6c6fc21903 100644 --- a/br/pkg/lightning/mydump/csv_parser.go +++ b/br/pkg/lightning/mydump/csv_parser.go @@ -25,6 +25,7 @@ import ( "github.com/pingcap/tidb/br/pkg/lightning/log" "github.com/pingcap/tidb/br/pkg/lightning/metric" "github.com/pingcap/tidb/br/pkg/lightning/worker" + tidbconfig "github.com/pingcap/tidb/config" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/mathutil" ) @@ -33,8 +34,14 @@ var ( errUnterminatedQuotedField = errors.NewNoStackError("syntax error: unterminated quoted field") errDanglingBackslash = errors.NewNoStackError("syntax error: no character after backslash") errUnexpectedQuoteField = errors.NewNoStackError("syntax error: cannot have consecutive fields without separator") + // LargestEntryLimit is the max size for reading file to buf + LargestEntryLimit int ) +func init() { + LargestEntryLimit = tidbconfig.MaxTxnEntrySizeLimit +} + // CSVParser is basically a copy of encoding/csv, but special-cased for MySQL-like input. type CSVParser struct { blockParser @@ -336,6 +343,9 @@ func (parser *CSVParser) readUntil(chars *byteSet) ([]byte, byte, error) { var buf []byte for { buf = append(buf, parser.buf...) + if len(buf) > LargestEntryLimit { + return buf, 0, errors.New("size of row cannot exceed the max value of txn-entry-size-limit") + } parser.buf = nil if err := parser.readBlock(); err != nil || len(parser.buf) == 0 { if err == nil { @@ -447,9 +457,18 @@ outside: func (parser *CSVParser) readQuotedField() error { for { + prevPos := parser.pos content, terminator, err := parser.readUntil(&parser.quoteByteSet) - err = parser.replaceEOF(err, errUnterminatedQuotedField) if err != nil { + if errors.Cause(err) == io.EOF { + // return the position of quote to the caller. + // because we return an error here, the parser won't + // use the `pos` again, so it's safe to modify it here. + parser.pos = prevPos - 1 + // set buf to parser.buf in order to print err log + parser.buf = content + err = parser.replaceEOF(err, errUnterminatedQuotedField) + } return err } parser.recordBuffer = append(parser.recordBuffer, content...) diff --git a/br/pkg/lightning/mydump/csv_parser_test.go b/br/pkg/lightning/mydump/csv_parser_test.go index 2696a6909c96c..da06c15ed39d9 100644 --- a/br/pkg/lightning/mydump/csv_parser_test.go +++ b/br/pkg/lightning/mydump/csv_parser_test.go @@ -1,6 +1,7 @@ package mydump_test import ( + "bytes" "context" "encoding/csv" "fmt" @@ -680,6 +681,29 @@ func TestConsecutiveFields(t *testing.T) { }) } +func TestTooLargeRow(t *testing.T) { + cfg := config.MydumperRuntime{ + CSV: config.CSVConfig{ + Separator: ",", + Delimiter: `"`, + }, + } + var testCase bytes.Buffer + testCase.WriteString("a,b,c,d") + // WARN: will take up 10KB memory here. + mydump.LargestEntryLimit = 10 * 1024 + for i := 0; i < mydump.LargestEntryLimit; i++ { + testCase.WriteByte('d') + } + charsetConvertor, err := mydump.NewCharsetConvertor(cfg.DataCharacterSet, cfg.DataInvalidCharReplace) + require.NoError(t, err) + parser, err := mydump.NewCSVParser(context.Background(), &cfg.CSV, mydump.NewStringReader(testCase.String()), int64(config.ReadBlockSize), ioWorkers, false, charsetConvertor) + require.NoError(t, err) + e := parser.ReadRow() + require.Error(t, e) + require.Contains(t, e.Error(), "size of row cannot exceed the max value of txn-entry-size-limit") +} + func TestSpecialChars(t *testing.T) { cfg := config.MydumperRuntime{ CSV: config.CSVConfig{Separator: ",", Delimiter: `"`}, diff --git a/br/tests/lightning_csv/errData/db-schema-create.sql b/br/tests/lightning_csv/errData/db-schema-create.sql new file mode 100755 index 0000000000000..6adfeca7f7dab --- /dev/null +++ b/br/tests/lightning_csv/errData/db-schema-create.sql @@ -0,0 +1 @@ +create database if not exists db; diff --git a/br/tests/lightning_csv/errData/db.test-schema.sql b/br/tests/lightning_csv/errData/db.test-schema.sql new file mode 100755 index 0000000000000..955632c7761b2 --- /dev/null +++ b/br/tests/lightning_csv/errData/db.test-schema.sql @@ -0,0 +1 @@ +create table test(a int primary key, b int, c int, d int); diff --git a/br/tests/lightning_csv/errData/db.test.1.csv b/br/tests/lightning_csv/errData/db.test.1.csv new file mode 100755 index 0000000000000..2e8450c25786e --- /dev/null +++ b/br/tests/lightning_csv/errData/db.test.1.csv @@ -0,0 +1,3 @@ +1,2,3,4 +2,10,4,5 +1111,",7,8 diff --git a/br/tests/lightning_csv/run.sh b/br/tests/lightning_csv/run.sh index 83c4917b4b76c..682bc55b08e26 100755 --- a/br/tests/lightning_csv/run.sh +++ b/br/tests/lightning_csv/run.sh @@ -41,3 +41,11 @@ for BACKEND in tidb local; do check_not_contains 'id:' done + +set +e +run_lightning --backend local -d "tests/$TEST_NAME/errData" --log-file "$TEST_DIR/lightning-err.log" 2>/dev/null +set -e +# err content presented +grep ",7,8" "$TEST_DIR/lightning-err.log" +# pos should not set to end +grep "[\"syntax error\"] [pos=22]" "$TEST_DIR/lightning-err.log" \ No newline at end of file diff --git a/config/config.go b/config/config.go index f4b353156698b..1ef50edb01fd5 100644 --- a/config/config.go +++ b/config/config.go @@ -46,6 +46,8 @@ import ( // Config number limitations const ( MaxLogFileSize = 4096 // MB + // MaxTxnEntrySize is the max value of TxnEntrySizeLimit. + MaxTxnEntrySizeLimit = 120 * 1024 * 1024 // 120MB // DefTxnEntrySizeLimit is the default value of TxnEntrySizeLimit. DefTxnEntrySizeLimit = 6 * 1024 * 1024 // DefTxnTotalSizeLimit is the default value of TxnTxnTotalSizeLimit. diff --git a/tidb-server/main.go b/tidb-server/main.go index 82a614486e479..6d2b432a36a9c 100644 --- a/tidb-server/main.go +++ b/tidb-server/main.go @@ -658,7 +658,7 @@ func setGlobalVars() { } else { kv.TxnTotalSizeLimit = cfg.Performance.TxnTotalSizeLimit } - if cfg.Performance.TxnEntrySizeLimit > 120*1024*1024 { + if cfg.Performance.TxnEntrySizeLimit > config.MaxTxnEntrySizeLimit { log.Fatal("cannot set txn entry size limit larger than 120M") } kv.TxnEntrySizeLimit = cfg.Performance.TxnEntrySizeLimit