From a4f5d31e96dee93a20e1872f5dff982aa5101694 Mon Sep 17 00:00:00 2001 From: Breeze0806 Date: Sat, 29 Apr 2023 21:51:01 +0800 Subject: [PATCH 01/10] =?UTF-8?q?feat:=20=E6=96=B0=E5=A2=9E=E6=B5=81?= =?UTF-8?q?=E6=8E=A7=E7=89=B9=E6=80=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README_USER.md | 6 +- datax/core/taskgroup/task_execer.go | 2 +- datax/core/transport/channel/channel.go | 39 +++++++-- datax/core/transport/channel/channel_test.go | 84 ++++++++++++++++++- .../transport/exchange/record_exchanger.go | 14 ++-- .../exchange/record_exchanger_test.go | 2 +- go.mod | 1 + go.sum | 2 + 8 files changed, 130 insertions(+), 20 deletions(-) diff --git a/README_USER.md b/README_USER.md index 04a8abd..d9914a6 100644 --- a/README_USER.md +++ b/README_USER.md @@ -17,9 +17,9 @@ make release ### windows #### 依赖 -1.需要mingw-w64 with gcc 7.2.0以上的环境进行编译 -2.golang 1.16以及以上 -3.最小编译环境为win7 +1. 需要mingw-w64 with gcc 7.2.0以上的环境进行编译 +2. golang 1.16以及以上 +3. 最小编译环境为win7 #### 构建 ```bash diff --git a/datax/core/taskgroup/task_execer.go b/datax/core/taskgroup/task_execer.go index ca43bfb..85667e5 100644 --- a/datax/core/taskgroup/task_execer.go +++ b/datax/core/taskgroup/task_execer.go @@ -63,7 +63,7 @@ func newTaskExecer(ctx context.Context, taskConf *config.JSON, ctx: ctx, attemptCount: atomic.NewInt32(int32(attemptCount)), } - t.channel, err = channel.NewChannel(ctx) + t.channel, err = channel.NewChannel(ctx, nil) if err != nil { return nil, err } diff --git a/datax/core/transport/channel/channel.go b/datax/core/transport/channel/channel.go index a8a0113..c748338 100644 --- a/datax/core/transport/channel/channel.go +++ b/datax/core/transport/channel/channel.go @@ -17,18 +17,38 @@ package channel import ( "context" + "github.com/Breeze0806/go-etl/config" + coreconst "github.com/Breeze0806/go-etl/datax/common/config/core" "github.com/Breeze0806/go-etl/element" + "golang.org/x/time/rate" ) //Channel 通道 type Channel struct { + limiter *rate.Limiter records *element.RecordChan + ctx context.Context } //NewChannel 创建通道 -func NewChannel(ctx context.Context) (*Channel, error) { +func NewChannel(ctx context.Context, conf *config.JSON) (*Channel, error) { + r := -1 + b := -1.0 + if conf != nil { + b = conf.GetFloat64OrDefaullt(coreconst.DataxJobSettingSpeedByte, -1.0) + r = int(conf.GetInt64OrDefaullt(coreconst.DataxJobSettingSpeedRecord, -1)) + } + var limiter *rate.Limiter + if b > 0 { + limiter = rate.NewLimiter(rate.Limit(b), int(b)) + } + if r < 0 { + r = 0 + } return &Channel{ - records: element.NewRecordChan(ctx), + records: element.NewRecordChanBuffer(ctx, r), + ctx: ctx, + limiter: limiter, }, nil } @@ -43,8 +63,14 @@ func (c *Channel) IsEmpty() bool { } //Push 将记录r加入通道 -func (c *Channel) Push(r element.Record) int { - return c.records.PushBack(r) +func (c *Channel) Push(r element.Record) (n int, err error) { + if c.limiter != nil { + if err = c.limiter.WaitN(c.ctx, int(r.ByteSize())); err != nil { + return 0, err + } + } + + return c.records.PushBack(r), nil } //Pop 将记录弹出,当通道中不存在记录,就会返回false @@ -68,6 +94,7 @@ func (c *Channel) Close() { } //PushTerminate 加入终止记录 -func (c *Channel) PushTerminate() int { - return c.Push(element.GetTerminateRecord()) +func (c *Channel) PushTerminate() (n int) { + n, _ = c.Push(element.GetTerminateRecord()) + return } diff --git a/datax/core/transport/channel/channel_test.go b/datax/core/transport/channel/channel_test.go index 47e5f53..dea60d0 100644 --- a/datax/core/transport/channel/channel_test.go +++ b/datax/core/transport/channel/channel_test.go @@ -17,19 +17,30 @@ package channel import ( "context" "fmt" + "sync" "testing" + "github.com/Breeze0806/go-etl/config" "github.com/Breeze0806/go-etl/element" ) +type mockRecord struct { + *element.DefaultRecord + + n int64 +} + +func (m *mockRecord) ByteSize() int64 { + return m.n +} func TestChannel_PushPop(t *testing.T) { - ch, _ := NewChannel(context.TODO()) + ch, _ := NewChannel(context.TODO(), nil) defer ch.Close() if !ch.IsEmpty() { t.Errorf("IsEmpty() = %v want true", ch.IsEmpty()) } - if n := ch.Push(element.NewDefaultRecord()); n != 1 { + if n, _ := ch.Push(element.NewDefaultRecord()); n != 1 { t.Errorf("Push() = %v want 1", n) } if n := ch.PushTerminate(); n != 2 { @@ -41,7 +52,7 @@ func TestChannel_PushPop(t *testing.T) { } func TestChannel_PushAllPopAll(t *testing.T) { - ch, _ := NewChannel(context.TODO()) + ch, _ := NewChannel(context.TODO(), nil) defer ch.Close() if !ch.IsEmpty() { t.Errorf("IsEmpty() = %v want true", ch.IsEmpty()) @@ -63,3 +74,70 @@ func TestChannel_PushAllPopAll(t *testing.T) { t.Errorf("PopAll() = %v want nil", err) } } + +func TestChannelWithRateLimit(t *testing.T) { + conf, _ := config.NewJSONFromString(`{ + "job":{ + "setting":{ + "speed":{ + "byte":10000, + "record":10 + } + } + } + }`) + want := 1000 + ch, _ := NewChannel(context.TODO(), conf) + defer ch.Close() + var wg sync.WaitGroup + wg.Add(1) + n := 0 + go func() { + defer wg.Done() + for { + r, _ := ch.Pop() + switch r.(type) { + case *element.TerminateRecord: + return + } + n++ + } + }() + for i := 0; i < want; i++ { + ch.Push(&mockRecord{ + DefaultRecord: element.NewDefaultRecord(), + n: int64(100), + }) + } + ch.PushTerminate() + wg.Wait() + + if n != want { + t.Errorf("want:%v n:%v", want, n) + } +} + +func TestChannelWithRateLimit_Err(t *testing.T) { + conf, _ := config.NewJSONFromString(`{ + "job":{ + "setting":{ + "speed":{ + "byte":10000, + "record":10 + } + } + } + }`) + want := 1000 + ch, _ := NewChannel(context.TODO(), conf) + defer ch.Close() + for i := 0; i < want; i++ { + _, err := ch.Push(&mockRecord{ + DefaultRecord: element.NewDefaultRecord(), + n: int64(100000), + }) + if err == nil { + t.Fatal("want error back") + } + } +} diff --git a/datax/core/transport/exchange/record_exchanger.go b/datax/core/transport/exchange/record_exchanger.go index 4885fab..75bb642 100644 --- a/datax/core/transport/exchange/record_exchanger.go +++ b/datax/core/transport/exchange/record_exchanger.go @@ -51,7 +51,7 @@ func NewRecordExchanger(ch *channel.Channel, tran transform.Transformer) *Record //GetFromReader 从Reader中获取记录 //当交换器关闭,通道为空或者收到终止消息也会报错 -func (r *RecordExchanger) GetFromReader() (element.Record, error) { +func (r *RecordExchanger) GetFromReader() (newRecord element.Record, err error) { if r.isShutdown { return nil, ErrShutdown } @@ -64,7 +64,10 @@ func (r *RecordExchanger) GetFromReader() (element.Record, error) { case *element.TerminateRecord: return nil, ErrTerminate default: - return record, nil + if newRecord, err = r.tran.DoTransform(record); err != nil { + return nil, err + } + return } } @@ -85,10 +88,9 @@ func (r *RecordExchanger) SendWriter(record element.Record) (err error) { if r.isShutdown { return ErrShutdown } - var newRecord element.Record - if newRecord, err = r.tran.DoTransform(record); err == nil { - r.ch.Push(newRecord) - } + + r.ch.Push(record) + return } diff --git a/datax/core/transport/exchange/record_exchanger_test.go b/datax/core/transport/exchange/record_exchanger_test.go index 38aab9a..b307920 100644 --- a/datax/core/transport/exchange/record_exchanger_test.go +++ b/datax/core/transport/exchange/record_exchanger_test.go @@ -63,7 +63,7 @@ func (m *mockRecord) String() string { return "" } func TestRecordExchanger(t *testing.T) { - ch, _ := channel.NewChannel(context.TODO()) + ch, _ := channel.NewChannel(context.TODO(), nil) defer ch.Close() re := NewRecordExchangerWithoutTransformer(ch) defer re.Shutdown() diff --git a/go.mod b/go.mod index ed323af..dddb6a8 100644 --- a/go.mod +++ b/go.mod @@ -16,4 +16,5 @@ require ( github.com/xuri/excelize/v2 v2.6.1 go.uber.org/atomic v1.9.0 golang.org/x/text v0.4.0 + golang.org/x/time v0.3.0 ) diff --git a/go.sum b/go.sum index 0011fe4..32e15c1 100644 --- a/go.sum +++ b/go.sum @@ -117,6 +117,8 @@ golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.4.0 h1:BrVqGRd7+k1DiOgtnFvAkoQEWQvBc25ouMJM6429SFg= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= +golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= From 3708a4c7167db2453362e6f66acdbac20e31c075 Mon Sep 17 00:00:00 2001 From: Breeze0806 Date: Tue, 2 May 2023 00:40:52 +0800 Subject: [PATCH 02/10] =?UTF-8?q?feat=20&=20docs:=20=E6=96=B0=E5=A2=9Echan?= =?UTF-8?q?nel=E7=BB=9F=E8=AE=A1=E4=BF=A1=E6=81=AF,=E5=8F=98=E6=9B=B4reade?= =?UTF-8?q?r=EF=BC=8Cwriter=E7=9A=84rdbm=E5=88=B0dbms,=20=E6=96=B0?= =?UTF-8?q?=E5=A2=9E=E5=88=87=E5=88=86=E4=B8=BB=E9=94=AE=E9=A2=84=E7=BD=AE?= =?UTF-8?q?=E6=9C=80=E5=A4=A7=E5=80=BC=E5=92=8C=E6=9C=80=E5=B0=8F=E5=80=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README_USER.md | 10 +- cmd/datax/examples/csvpostgres/config.json | 4 +- cmd/datax/examples/db2/config.json | 4 +- cmd/datax/examples/mysql/config.json | 4 +- cmd/datax/examples/oracle/config.json | 4 +- cmd/datax/examples/postgres/config.json | 4 +- cmd/datax/examples/postgrescsv/config.json | 4 +- cmd/datax/examples/postgresxlsx/config.json | 4 +- cmd/datax/examples/prePostSql/mysql.json | 4 +- cmd/datax/examples/split/csv.json | 4 +- cmd/datax/examples/split/mysql.json | 4 +- cmd/datax/examples/sqlserver/config.json | 4 +- cmd/datax/examples/xlsxpostgres/config.json | 12 +- datax/common/plugin/job.go | 15 + datax/common/plugin/job_test.go | 29 ++ datax/core/job/container.go | 2 + datax/core/taskgroup/container.go | 4 +- datax/core/transport/channel/channel.go | 52 ++- datax/core/transport/channel/channel_test.go | 11 +- datax/plugin/reader/db2/README.md | 21 +- datax/plugin/reader/db2/job.go | 4 +- datax/plugin/reader/db2/reader.go | 6 +- datax/plugin/reader/db2/task.go | 6 +- datax/plugin/reader/{rdbm => dbms}/config.go | 2 +- .../reader/{rdbm => dbms}/db_handler.go | 2 +- datax/plugin/reader/{rdbm => dbms}/job.go | 64 +++- .../plugin/reader/{rdbm => dbms}/job_test.go | 32 +- datax/plugin/reader/{rdbm => dbms}/log.go | 2 +- .../plugin/reader/{rdbm => dbms}/parameter.go | 6 +- .../reader/{rdbm => dbms}/parameter_test.go | 2 +- datax/plugin/reader/{rdbm => dbms}/querier.go | 2 +- .../{rdbm => dbms}/querier_help_test.go | 15 +- datax/plugin/reader/{rdbm => dbms}/split.go | 97 +++++- .../reader/{rdbm => dbms}/split_test.go | 321 +++++++++++++++++- datax/plugin/reader/{rdbm => dbms}/task.go | 2 +- .../plugin/reader/{rdbm => dbms}/task_test.go | 2 +- datax/plugin/reader/mysql/README.md | 19 +- datax/plugin/reader/mysql/job.go | 4 +- datax/plugin/reader/mysql/reader.go | 6 +- datax/plugin/reader/mysql/task.go | 6 +- datax/plugin/reader/oracle/README.md | 19 +- datax/plugin/reader/oracle/job.go | 4 +- datax/plugin/reader/oracle/reader.go | 6 +- datax/plugin/reader/oracle/task.go | 6 +- datax/plugin/reader/postgres/README.md | 19 +- datax/plugin/reader/postgres/job.go | 4 +- datax/plugin/reader/postgres/reader.go | 8 +- datax/plugin/reader/postgres/task.go | 6 +- datax/plugin/reader/sqlserver/README.md | 19 +- datax/plugin/reader/sqlserver/job.go | 4 +- datax/plugin/reader/sqlserver/reader.go | 6 +- datax/plugin/reader/sqlserver/task.go | 6 +- datax/plugin/writer/db2/README.md | 2 +- datax/plugin/writer/db2/job.go | 4 +- datax/plugin/writer/db2/task.go | 10 +- datax/plugin/writer/db2/task_test.go | 6 +- datax/plugin/writer/db2/writer.go | 10 +- .../writer/{rdbm => dbms}/batch_writer.go | 2 +- .../{rdbm => dbms}/batch_writer_test.go | 2 +- datax/plugin/writer/{rdbm => dbms}/config.go | 12 +- .../writer/{rdbm => dbms}/config_test.go | 16 +- .../writer/{rdbm => dbms}/db_handler.go | 6 +- datax/plugin/writer/{rdbm => dbms}/execer.go | 2 +- .../writer/{rdbm => dbms}/execer_help_test.go | 8 +- datax/plugin/writer/{rdbm => dbms}/job.go | 2 +- .../plugin/writer/{rdbm => dbms}/job_test.go | 2 +- datax/plugin/writer/{rdbm => dbms}/log.go | 2 +- datax/plugin/writer/{rdbm => dbms}/task.go | 2 +- .../plugin/writer/{rdbm => dbms}/task_test.go | 2 +- datax/plugin/writer/mysql/README.md | 2 +- datax/plugin/writer/mysql/job.go | 4 +- datax/plugin/writer/mysql/task.go | 14 +- datax/plugin/writer/mysql/task_test.go | 16 +- datax/plugin/writer/mysql/writer.go | 10 +- datax/plugin/writer/oracle/README.md | 2 +- datax/plugin/writer/oracle/job.go | 4 +- datax/plugin/writer/oracle/task.go | 10 +- datax/plugin/writer/oracle/task_test.go | 6 +- datax/plugin/writer/oracle/writer.go | 10 +- datax/plugin/writer/postgres/README.md | 2 +- datax/plugin/writer/postgres/job.go | 4 +- datax/plugin/writer/postgres/task.go | 12 +- datax/plugin/writer/postgres/task_test.go | 8 +- datax/plugin/writer/postgres/writer.go | 10 +- datax/plugin/writer/sqlserver/README.md | 2 +- datax/plugin/writer/sqlserver/job.go | 4 +- datax/plugin/writer/sqlserver/task.go | 12 +- datax/plugin/writer/sqlserver/task_test.go | 8 +- datax/plugin/writer/sqlserver/writer.go | 10 +- 89 files changed, 890 insertions(+), 257 deletions(-) rename datax/plugin/reader/{rdbm => dbms}/config.go (99%) rename datax/plugin/reader/{rdbm => dbms}/db_handler.go (99%) rename datax/plugin/reader/{rdbm => dbms}/job.go (79%) rename datax/plugin/reader/{rdbm => dbms}/job_test.go (89%) rename datax/plugin/reader/{rdbm => dbms}/log.go (98%) rename datax/plugin/reader/{rdbm => dbms}/parameter.go (98%) rename datax/plugin/reader/{rdbm => dbms}/parameter_test.go (99%) rename datax/plugin/reader/{rdbm => dbms}/querier.go (99%) rename datax/plugin/reader/{rdbm => dbms}/querier_help_test.go (94%) rename datax/plugin/reader/{rdbm => dbms}/split.go (75%) rename datax/plugin/reader/{rdbm => dbms}/split_test.go (76%) rename datax/plugin/reader/{rdbm => dbms}/task.go (99%) rename datax/plugin/reader/{rdbm => dbms}/task_test.go (99%) rename datax/plugin/writer/{rdbm => dbms}/batch_writer.go (99%) rename datax/plugin/writer/{rdbm => dbms}/batch_writer_test.go (99%) rename datax/plugin/writer/{rdbm => dbms}/config.go (94%) rename datax/plugin/writer/{rdbm => dbms}/config_test.go (96%) rename datax/plugin/writer/{rdbm => dbms}/db_handler.go (93%) rename datax/plugin/writer/{rdbm => dbms}/execer.go (99%) rename datax/plugin/writer/{rdbm => dbms}/execer_help_test.go (97%) rename datax/plugin/writer/{rdbm => dbms}/job.go (99%) rename datax/plugin/writer/{rdbm => dbms}/job_test.go (99%) rename datax/plugin/writer/{rdbm => dbms}/log.go (98%) rename datax/plugin/writer/{rdbm => dbms}/task.go (99%) rename datax/plugin/writer/{rdbm => dbms}/task_test.go (99%) diff --git a/README_USER.md b/README_USER.md index d9914a6..5f50451 100644 --- a/README_USER.md +++ b/README_USER.md @@ -129,14 +129,18 @@ datax -c tools/testData/xlsx.json -w tools/testData/wizard.csv ], "setting":{ "speed":{ - "byte":3000, - "record":400, + "byte":0, + "record":1024, "channel":4 } } } } ``` +#### 流控配置 + +之前speed的byte和record配置并不会生效,现在加入流控特性后,byte和record将会生效,byte会限制缓存消息字节数,而record会限制缓存消息条数,如果byte设置过小会导致缓存过小而导致同步数据失败。当byte为0或负数时,限制器将不会工作。 + #### 源目的配置向导文件 源目的配置向导文件是csv文件,每行配置可以配置如下: @@ -295,7 +299,7 @@ datax -c examples/postgresxlsx/config.json #### 使用切分键 -这里假设数据按切分键分布是均匀的,合理使用这样的切分键可以使同步更快。 +这里假设数据按切分键分布是均匀的,合理使用这样的切分键可以使同步更快,另外为了加快对最大值和最小值的查询,这里对于大表可以预设最大最小值 ##### 测试方式 - 使用程序生成mysql数据产生split.csv diff --git a/cmd/datax/examples/csvpostgres/config.json b/cmd/datax/examples/csvpostgres/config.json index 45885d5..4f02526 100644 --- a/cmd/datax/examples/csvpostgres/config.json +++ b/cmd/datax/examples/csvpostgres/config.json @@ -64,8 +64,8 @@ ], "setting":{ "speed":{ - "byte":3000, - "record":400, + "byte":0, + "record":1024, "channel":4 } } diff --git a/cmd/datax/examples/db2/config.json b/cmd/datax/examples/db2/config.json index 793f6bf..db1545b 100644 --- a/cmd/datax/examples/db2/config.json +++ b/cmd/datax/examples/db2/config.json @@ -64,8 +64,8 @@ ], "setting":{ "speed":{ - "byte":3000, - "record":400, + "byte":0, + "record":1024, "channel":4 } } diff --git a/cmd/datax/examples/mysql/config.json b/cmd/datax/examples/mysql/config.json index e5a1d0d..1c09f4c 100644 --- a/cmd/datax/examples/mysql/config.json +++ b/cmd/datax/examples/mysql/config.json @@ -65,8 +65,8 @@ ], "setting":{ "speed":{ - "byte":3000, - "record":400, + "byte":0, + "record":1024, "channel":4 } } diff --git a/cmd/datax/examples/oracle/config.json b/cmd/datax/examples/oracle/config.json index a955db1..3a4ff80 100644 --- a/cmd/datax/examples/oracle/config.json +++ b/cmd/datax/examples/oracle/config.json @@ -64,8 +64,8 @@ ], "setting":{ "speed":{ - "byte":3000, - "record":400, + "byte":0, + "record":1024, "channel":4 } } diff --git a/cmd/datax/examples/postgres/config.json b/cmd/datax/examples/postgres/config.json index babe55a..7fc0ead 100644 --- a/cmd/datax/examples/postgres/config.json +++ b/cmd/datax/examples/postgres/config.json @@ -66,8 +66,8 @@ ], "setting":{ "speed":{ - "byte":3000, - "record":400, + "byte":0, + "record":1024, "channel":4 } } diff --git a/cmd/datax/examples/postgrescsv/config.json b/cmd/datax/examples/postgrescsv/config.json index 7dca417..ac50220 100644 --- a/cmd/datax/examples/postgrescsv/config.json +++ b/cmd/datax/examples/postgrescsv/config.json @@ -63,8 +63,8 @@ ], "setting":{ "speed":{ - "byte":3000, - "record":400, + "byte":0, + "record":1024, "channel":4 } } diff --git a/cmd/datax/examples/postgresxlsx/config.json b/cmd/datax/examples/postgresxlsx/config.json index d89b2f0..2cc8bdc 100644 --- a/cmd/datax/examples/postgresxlsx/config.json +++ b/cmd/datax/examples/postgresxlsx/config.json @@ -66,8 +66,8 @@ ], "setting":{ "speed":{ - "byte":3000, - "record":400, + "byte":0, + "record":1024, "channel":4 } } diff --git a/cmd/datax/examples/prePostSql/mysql.json b/cmd/datax/examples/prePostSql/mysql.json index 18726bc..6a7b9ea 100644 --- a/cmd/datax/examples/prePostSql/mysql.json +++ b/cmd/datax/examples/prePostSql/mysql.json @@ -69,8 +69,8 @@ ], "setting":{ "speed":{ - "byte":3000, - "record":400, + "byte":0, + "record":1024, "channel":4 } } diff --git a/cmd/datax/examples/split/csv.json b/cmd/datax/examples/split/csv.json index 804af1b..2928a1c 100644 --- a/cmd/datax/examples/split/csv.json +++ b/cmd/datax/examples/split/csv.json @@ -56,8 +56,8 @@ ], "setting":{ "speed":{ - "byte":3000, - "record":400, + "byte":0, + "record":1024, "channel":4 } } diff --git a/cmd/datax/examples/split/mysql.json b/cmd/datax/examples/split/mysql.json index 40d5ef8..703a752 100644 --- a/cmd/datax/examples/split/mysql.json +++ b/cmd/datax/examples/split/mysql.json @@ -68,8 +68,8 @@ ], "setting":{ "speed":{ - "byte":3000, - "record":400, + "byte":0, + "record":1024, "channel":4 } } diff --git a/cmd/datax/examples/sqlserver/config.json b/cmd/datax/examples/sqlserver/config.json index 12ecac3..6ac1797 100644 --- a/cmd/datax/examples/sqlserver/config.json +++ b/cmd/datax/examples/sqlserver/config.json @@ -69,8 +69,8 @@ ], "setting":{ "speed":{ - "byte":3000, - "record":400, + "byte":0, + "record":1024, "channel":4 } } diff --git a/cmd/datax/examples/xlsxpostgres/config.json b/cmd/datax/examples/xlsxpostgres/config.json index 43ac2d7..1977549 100644 --- a/cmd/datax/examples/xlsxpostgres/config.json +++ b/cmd/datax/examples/xlsxpostgres/config.json @@ -11,14 +11,6 @@ "retryIntervalInMsec":0 } } - }, - "transport":{ - "channel":{ - "speed":{ - "byte": 100, - "record":100 - } - } } }, "job":{ @@ -67,8 +59,8 @@ ], "setting":{ "speed":{ - "byte":3000, - "record":400, + "byte":0, + "record":1024, "channel":4 } } diff --git a/datax/common/plugin/job.go b/datax/common/plugin/job.go index ae31173..c5f62a9 100644 --- a/datax/common/plugin/job.go +++ b/datax/common/plugin/job.go @@ -17,6 +17,10 @@ package plugin //Job 工作 type Job interface { Plugin + //工作ID + JobID() int64 + //设置工作ID + SetJobID(jobID int64) Collector() JobCollector //todo 工作采集器目前未使用 SetCollector(JobCollector) //todo 设置工作采集器目前未使用 } @@ -25,6 +29,7 @@ type Job interface { type BaseJob struct { *BasePlugin + id int64 collector JobCollector } @@ -35,6 +40,16 @@ func NewBaseJob() *BaseJob { } } +//JobID 工作ID +func (b *BaseJob) JobID() int64 { + return b.id +} + +//SetJobID 设置工作ID +func (b *BaseJob) SetJobID(jobID int64) { + b.id = jobID +} + //Collector 采集器 func (b *BaseJob) Collector() JobCollector { return b.collector diff --git a/datax/common/plugin/job_test.go b/datax/common/plugin/job_test.go index bc9124e..4f32d93 100644 --- a/datax/common/plugin/job_test.go +++ b/datax/common/plugin/job_test.go @@ -58,3 +58,32 @@ func TestBaseJob_SetCollector(t *testing.T) { }) } } + +func TestBaseJob_SetJobID(t *testing.T) { + type args struct { + jobID int64 + } + tests := []struct { + name string + b *BaseJob + args args + want int64 + }{ + { + name: "1", + b: &BaseJob{}, + args: args{ + jobID: 10, + }, + want: 10, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.b.SetJobID(tt.args.jobID) + if tt.b.JobID() != tt.want { + t.Errorf("JobID() = %v want %v", tt.b.JobID(), tt.want) + } + }) + } +} diff --git a/datax/core/job/container.go b/datax/core/job/container.go index c825e78..d9ad13a 100644 --- a/datax/core/job/container.go +++ b/datax/core/job/container.go @@ -454,6 +454,7 @@ func (c *Container) initReaderJob(collector plugin.JobCollector, readerConfig, w job.SetPluginJobConf(readerConfig) job.SetPeerPluginJobConf(writerConfig) job.SetPeerPluginName(c.writerPluginName) + job.SetJobID(c.jobID) err = job.Init(c.ctx) if err != nil { return @@ -474,6 +475,7 @@ func (c *Container) initWriterJob(collector plugin.JobCollector, readerConfig, w job.SetPluginJobConf(writerConfig) job.SetPeerPluginJobConf(readerConfig) job.SetPeerPluginName(c.readerPluginName) + job.SetJobID(c.jobID) err = job.Init(c.ctx) if err != nil { return diff --git a/datax/core/taskgroup/container.go b/datax/core/taskgroup/container.go index c8002f5..0afc605 100644 --- a/datax/core/taskgroup/container.go +++ b/datax/core/taskgroup/container.go @@ -67,8 +67,8 @@ func NewContainer(ctx context.Context, conf *config.JSON) (c *Container, err err c.retryInterval = time.Duration( c.Config().GetInt64OrDefaullt(coreconst.DataxCoreContainerTaskFailoverMaxretrytimes, 10000)) * time.Millisecond c.retryMaxCount = int32(c.Config().GetInt64OrDefaullt(coreconst.DataxCoreContainerTaskFailoverMaxretrytimes, 1)) - log.Infof("datax job(%v) taskgruop(%v) sleepInterval: %v retryInterval: %v retryMaxCount: %v", - c.jobID, c.taskGroupID, c.sleepInterval, c.retryInterval, c.retryMaxCount) + log.Infof("datax job(%v) taskgruop(%v) sleepInterval: %v retryInterval: %v retryMaxCount: %v config: %v", + c.jobID, c.taskGroupID, c.sleepInterval, c.retryInterval, c.retryMaxCount, conf) return } diff --git a/datax/core/transport/channel/channel.go b/datax/core/transport/channel/channel.go index c748338..3f8bd0e 100644 --- a/datax/core/transport/channel/channel.go +++ b/datax/core/transport/channel/channel.go @@ -16,6 +16,7 @@ package channel import ( "context" + "sync" "github.com/Breeze0806/go-etl/config" coreconst "github.com/Breeze0806/go-etl/datax/common/config/core" @@ -28,6 +29,44 @@ type Channel struct { limiter *rate.Limiter records *element.RecordChan ctx context.Context + stats Stats +} + +//Stats Channel的统计信息 +type Stats struct { + sync.RWMutex + StatsJSON +} + +//StatsJSON Channel的JSON统计信息 +type StatsJSON struct { + TotalByte int64 `json:"totalByte"` + TotalRecord int64 `json:"totalRecord"` + Byte int64 `json:"byte"` + Record int64 `json:"record"` +} + +func (s *Stats) increase(b int64) { + s.Lock() + defer s.Unlock() + s.TotalByte += b + s.Byte += b + s.TotalRecord++ + s.Record++ +} + +func (s *Stats) reduce(b int64) { + s.Lock() + defer s.Unlock() + s.Byte -= b + s.Record-- +} + +//statsJSON 返回json的机构体 +func (s *Stats) statsJSON() StatsJSON { + s.RLock() + defer s.RUnlock() + return s.StatsJSON } //NewChannel 创建通道 @@ -69,13 +108,17 @@ func (c *Channel) Push(r element.Record) (n int, err error) { return 0, err } } - + c.stats.increase(r.ByteSize()) return c.records.PushBack(r), nil } //Pop 将记录弹出,当通道中不存在记录,就会返回false func (c *Channel) Pop() (r element.Record, ok bool) { - return c.records.PopFront() + r, ok = c.records.PopFront() + if r != nil { + c.stats.reduce(r.ByteSize()) + } + return } //PushAll 通过fetchRecord函数加入多条记录 @@ -98,3 +141,8 @@ func (c *Channel) PushTerminate() (n int) { n, _ = c.Push(element.GetTerminateRecord()) return } + +//StatsJSON 返回Channel的统计信息 +func (c *Channel) StatsJSON() StatsJSON { + return c.stats.statsJSON() +} diff --git a/datax/core/transport/channel/channel_test.go b/datax/core/transport/channel/channel_test.go index dea60d0..f936af3 100644 --- a/datax/core/transport/channel/channel_test.go +++ b/datax/core/transport/channel/channel_test.go @@ -87,6 +87,7 @@ func TestChannelWithRateLimit(t *testing.T) { } }`) want := 1000 + b := 100 ch, _ := NewChannel(context.TODO(), conf) defer ch.Close() var wg sync.WaitGroup @@ -106,7 +107,7 @@ func TestChannelWithRateLimit(t *testing.T) { for i := 0; i < want; i++ { ch.Push(&mockRecord{ DefaultRecord: element.NewDefaultRecord(), - n: int64(100), + n: int64(b), }) } ch.PushTerminate() @@ -115,6 +116,14 @@ func TestChannelWithRateLimit(t *testing.T) { if n != want { t.Errorf("want:%v n:%v", want, n) } + + if ch.StatsJSON().TotalByte != int64(b*want) { + t.Errorf("TotalByte:%v want:%v", ch.StatsJSON().TotalByte, b*want) + } + + if ch.StatsJSON().TotalRecord != int64(want+1) { + t.Errorf("TotalRecord:%v want:%v", ch.StatsJSON().TotalRecord, want+1) + } } func TestChannelWithRateLimit_Err(t *testing.T) { diff --git a/datax/plugin/reader/db2/README.md b/datax/plugin/reader/db2/README.md index 0e6016b..81c7c5c 100644 --- a/datax/plugin/reader/db2/README.md +++ b/datax/plugin/reader/db2/README.md @@ -8,7 +8,7 @@ DB2Reader插件实现了从DB2 LUW数据库读取数据。在底层实现上,D DB2Reader通过github.com/ibmdb/go_ibm_db使用db2的odbc库连接远程DB2 LUW数据库,并根据用户配置的信息生成查询SQL语句,然后发送到远程DB2 LUW数据库,并将该SQL执行返回结果使用go-etl自定义的数据类型拼装为抽象的数据集,并传递给下游Writer处理。 -DB2Reader通过使用rdbmreader中定义的查询流程调用go-etl自定义的storage/database的DBWrapper来实现具体的查询。DBWrapper封装了database/sql的众多接口,并且抽象出了数据库方言Dialect。其中DB2采取了storage/database/db2实现的Dialect。 +DB2Reader通过使用dbmsreader中定义的查询流程调用go-etl自定义的storage/database的DBWrapper来实现具体的查询。DBWrapper封装了database/sql的众多接口,并且抽象出了数据库方言Dialect。其中DB2采取了storage/database/db2实现的Dialect。 ## 功能说明 @@ -92,7 +92,24 @@ DB2Reader通过使用rdbmreader中定义的查询流程调用go-etl自定义的s ##### timeAccuracy -- 描述 主要用于配置db2表的时间切分键,主要用于描述时间最小单位,day(日),min(分钟),s(秒),ms(毫秒),us(微秒),ns(纳秒) +- 描述 主要用于配置db2表的时间切分键,主要用于描述时间最小单位,day(日),min(分钟),s(秒),ms(毫秒),us(微秒),ns(纳秒),在range设置默认值是必须有值 +- 必选:否 +- 默认值: 无 + +##### range + +###### type +- 描述 主要用于配置db2表的切分键默认值类型,值为bigInt/string/time,这里会检查表切分键中的类型,请务必确保类型正确。 +- 必选:否 +- 默认值: 无 + +###### left +- 描述 主要用于配置db2表的切分键默认最大值 +- 必选:否 +- 默认值: 无 + +###### right +- 描述 主要用于配置db2表的切分键默认最小值 - 必选:否 - 默认值: 无 diff --git a/datax/plugin/reader/db2/job.go b/datax/plugin/reader/db2/job.go index 4931b4e..1886054 100644 --- a/datax/plugin/reader/db2/job.go +++ b/datax/plugin/reader/db2/job.go @@ -15,10 +15,10 @@ package db2 import ( - "github.com/Breeze0806/go-etl/datax/plugin/reader/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/reader/dbms" ) //Job 工作 type Job struct { - *rdbm.Job + *dbms.Job } diff --git a/datax/plugin/reader/db2/reader.go b/datax/plugin/reader/db2/reader.go index de239a8..4f2b5b6 100644 --- a/datax/plugin/reader/db2/reader.go +++ b/datax/plugin/reader/db2/reader.go @@ -17,7 +17,7 @@ package db2 import ( "github.com/Breeze0806/go-etl/config" spireader "github.com/Breeze0806/go-etl/datax/common/spi/reader" - "github.com/Breeze0806/go-etl/datax/plugin/reader/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/reader/dbms" "github.com/Breeze0806/go-etl/storage/database" ) @@ -34,7 +34,7 @@ func (r *Reader) ResourcesConfig() *config.JSON { //Job 工作 func (r *Reader) Job() spireader.Job { job := &Job{ - Job: rdbm.NewJob(rdbm.NewBaseDbHandler(func(name string, conf *config.JSON) (q rdbm.Querier, err error) { + Job: dbms.NewJob(dbms.NewBaseDbHandler(func(name string, conf *config.JSON) (q dbms.Querier, err error) { if q, err = database.Open(name, conf); err != nil { return nil, err } @@ -48,7 +48,7 @@ func (r *Reader) Job() spireader.Job { //Task 任务 func (r *Reader) Task() spireader.Task { task := &Task{ - Task: rdbm.NewTask(rdbm.NewBaseDbHandler(func(name string, conf *config.JSON) (q rdbm.Querier, err error) { + Task: dbms.NewTask(dbms.NewBaseDbHandler(func(name string, conf *config.JSON) (q dbms.Querier, err error) { if q, err = database.Open(name, conf); err != nil { return nil, err } diff --git a/datax/plugin/reader/db2/task.go b/datax/plugin/reader/db2/task.go index 68ecf3f..8e68c89 100644 --- a/datax/plugin/reader/db2/task.go +++ b/datax/plugin/reader/db2/task.go @@ -18,7 +18,7 @@ import ( "context" "github.com/Breeze0806/go-etl/datax/common/plugin" - "github.com/Breeze0806/go-etl/datax/plugin/reader/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/reader/dbms" //db2 storage _ "github.com/Breeze0806/go-etl/storage/database/db2" @@ -26,10 +26,10 @@ import ( //Task 任务 type Task struct { - *rdbm.Task + *dbms.Task } //StartRead 开始读 func (t *Task) StartRead(ctx context.Context, sender plugin.RecordSender) (err error) { - return rdbm.StartRead(ctx, rdbm.NewBaseBatchReader(t.Task, "", nil), sender) + return dbms.StartRead(ctx, dbms.NewBaseBatchReader(t.Task, "", nil), sender) } diff --git a/datax/plugin/reader/rdbm/config.go b/datax/plugin/reader/dbms/config.go similarity index 99% rename from datax/plugin/reader/rdbm/config.go rename to datax/plugin/reader/dbms/config.go index e915a74..674d0d4 100644 --- a/datax/plugin/reader/rdbm/config.go +++ b/datax/plugin/reader/dbms/config.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package rdbm +package dbms import ( "encoding/json" diff --git a/datax/plugin/reader/rdbm/db_handler.go b/datax/plugin/reader/dbms/db_handler.go similarity index 99% rename from datax/plugin/reader/rdbm/db_handler.go rename to datax/plugin/reader/dbms/db_handler.go index 2e95539..d7f6ddb 100644 --- a/datax/plugin/reader/rdbm/db_handler.go +++ b/datax/plugin/reader/dbms/db_handler.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package rdbm +package dbms import ( "database/sql" diff --git a/datax/plugin/reader/rdbm/job.go b/datax/plugin/reader/dbms/job.go similarity index 79% rename from datax/plugin/reader/rdbm/job.go rename to datax/plugin/reader/dbms/job.go index 211c456..8af5222 100644 --- a/datax/plugin/reader/rdbm/job.go +++ b/datax/plugin/reader/dbms/job.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package rdbm +package dbms import ( "context" @@ -85,24 +85,11 @@ func (j *Job) Destroy(ctx context.Context) (err error) { return errors.Wrapf(err, "Close fail") } -//Split 切分 -func (j *Job) Split(ctx context.Context, number int) (configs []*config.JSON, err error) { - if j.Config.GetSplitConfig().Key == "" || number == 1 { - return []*config.JSON{j.PluginJobConf().CloneConfig()}, nil - } - - var splitTable database.Table - param := j.handler.SplitParam(j.Config, j.Querier) - if splitTable, err = j.Querier.FetchTableWithParam(ctx, param); err != nil { - err = errors.Wrapf(err, "FetchTableWithParam fail") - return - } - - var minColumn element.Column +func (j *Job) fetchMin(ctx context.Context, splitTable database.Table) (c element.Column, err error) { minHandler := database.NewBaseFetchHandler(func() (element.Record, error) { return element.NewDefaultRecord(), nil }, func(r element.Record) (err error) { - minColumn, err = r.GetByIndex(0) + c, err = r.GetByIndex(0) return nil }) minParam := j.handler.MinParam(j.Config, splitTable) @@ -110,12 +97,14 @@ func (j *Job) Split(ctx context.Context, number int) (configs []*config.JSON, er err = errors.Wrapf(err, "FetchTableWithParam fail") return } + return +} - var maxColumn element.Column +func (j *Job) fetchMax(ctx context.Context, splitTable database.Table) (c element.Column, err error) { maxHandler := database.NewBaseFetchHandler(func() (element.Record, error) { return element.NewDefaultRecord(), nil }, func(r element.Record) error { - maxColumn, err = r.GetByIndex(0) + c, err = r.GetByIndex(0) return nil }) @@ -124,6 +113,45 @@ func (j *Job) Split(ctx context.Context, number int) (configs []*config.JSON, er err = errors.Wrapf(err, "FetchTableWithParam fail") return } + return +} + +//Split 切分 +func (j *Job) Split(ctx context.Context, number int) (configs []*config.JSON, err error) { + if j.Config.GetSplitConfig().Key == "" || number == 1 { + return []*config.JSON{j.PluginJobConf().CloneConfig()}, nil + } + + if j.Config.GetSplitConfig().Range.Type == "" { + return j.split(ctx, number, j) + } + conf := j.Config.GetSplitConfig() + return j.split(ctx, number, &conf) +} + +func (j *Job) split(ctx context.Context, number int, fetcher splitRangeFetcher) (configs []*config.JSON, err error) { + var splitTable database.Table + log.Debugf("jobID: %v start to split", j.JobID()) + + param := j.handler.SplitParam(j.Config, j.Querier) + if splitTable, err = j.Querier.FetchTableWithParam(ctx, param); err != nil { + err = errors.Wrapf(err, "FetchTableWithParam fail") + return + } + log.Debugf("jobID: %v fetch split table end", j.JobID()) + + var minColumn element.Column + if minColumn, err = fetcher.fetchMin(ctx, splitTable); err != nil { + err = errors.Wrapf(err, "fetchMin fail") + return + } + log.Debugf("jobID: %v split fetchMin = %v", j.JobID(), minColumn) + var maxColumn element.Column + if maxColumn, err = fetcher.fetchMax(ctx, splitTable); err != nil { + err = errors.Wrapf(err, "fetchMax fail") + return + } + log.Debugf("jobID: %v split fetchMax = %v", j.JobID(), maxColumn) ranges, err := split(minColumn, maxColumn, number, j.Config.GetSplitConfig().TimeAccuracy, splitTable.Fields()[0]) diff --git a/datax/plugin/reader/rdbm/job_test.go b/datax/plugin/reader/dbms/job_test.go similarity index 89% rename from datax/plugin/reader/rdbm/job_test.go rename to datax/plugin/reader/dbms/job_test.go index e26a525..b23326f 100644 --- a/datax/plugin/reader/rdbm/job_test.go +++ b/datax/plugin/reader/dbms/job_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package rdbm +package dbms import ( "context" @@ -346,6 +346,36 @@ func TestJob_Split(t *testing.T) { testJSONFromString(`{}`), }, }, + { + name: "9", + j: &Job{ + BaseJob: plugin.NewBaseJob(), + Config: &BaseConfig{ + Split: SplitConfig{ + Key: "f1", + TimeAccuracy: "day", + Range: SplitRange{ + Type: "time", + Left: "2023-05-01", + Right: "2023-05-03", + }, + }, + }, + Querier: &MockQuerier{isTime: true}, + handler: newMockDbHandler(func(name string, conf *config.JSON) (Querier, error) { + return &MockQuerier{isTime: true}, nil + }), + }, + args: args{ + ctx: context.TODO(), + number: 2, + }, + jobConf: testJSONFromString(`{}`), + want: []*config.JSON{ + testJSONFromString(`{"split":{"range":{"type":"time","layout":"2006-01-02","left":"2023-05-01","right":"2023-05-02"}},"where":"f1 >= $1 and f1 < $2"}`), + testJSONFromString(`{"split":{"range":{"type":"time","layout":"2006-01-02","left":"2023-05-02","right":"2023-05-03"}},"where":"f1 >= $1 and f1 <= $2"}`), + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { diff --git a/datax/plugin/reader/rdbm/log.go b/datax/plugin/reader/dbms/log.go similarity index 98% rename from datax/plugin/reader/rdbm/log.go rename to datax/plugin/reader/dbms/log.go index 2f43843..49e805e 100644 --- a/datax/plugin/reader/rdbm/log.go +++ b/datax/plugin/reader/dbms/log.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package rdbm +package dbms import ( "os" diff --git a/datax/plugin/reader/rdbm/parameter.go b/datax/plugin/reader/dbms/parameter.go similarity index 98% rename from datax/plugin/reader/rdbm/parameter.go rename to datax/plugin/reader/dbms/parameter.go index 4084ac2..5d623a5 100644 --- a/datax/plugin/reader/rdbm/parameter.go +++ b/datax/plugin/reader/dbms/parameter.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package rdbm +package dbms import ( "bytes" @@ -116,10 +116,10 @@ func (q *QueryParam) Agrs(_ []element.Record) (a []interface{}, err error) { for _, v := range q.Table().Fields() { if q.Config.GetSplitConfig().Key == v.Name() { var left, right element.Column - if left, err = q.Config.GetSplitConfig().Range.leftColumn(); err != nil { + if left, err = q.Config.GetSplitConfig().Range.leftColumn(v.Name()); err != nil { return } - if right, err = q.Config.GetSplitConfig().Range.rightColumn(); err != nil { + if right, err = q.Config.GetSplitConfig().Range.rightColumn(v.Name()); err != nil { return } var li, ri interface{} diff --git a/datax/plugin/reader/rdbm/parameter_test.go b/datax/plugin/reader/dbms/parameter_test.go similarity index 99% rename from datax/plugin/reader/rdbm/parameter_test.go rename to datax/plugin/reader/dbms/parameter_test.go index 0a93aa9..89906c5 100644 --- a/datax/plugin/reader/rdbm/parameter_test.go +++ b/datax/plugin/reader/dbms/parameter_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package rdbm +package dbms import ( "reflect" diff --git a/datax/plugin/reader/rdbm/querier.go b/datax/plugin/reader/dbms/querier.go similarity index 99% rename from datax/plugin/reader/rdbm/querier.go rename to datax/plugin/reader/dbms/querier.go index b9857d4..c990af6 100644 --- a/datax/plugin/reader/rdbm/querier.go +++ b/datax/plugin/reader/dbms/querier.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package rdbm +package dbms import ( "context" diff --git a/datax/plugin/reader/rdbm/querier_help_test.go b/datax/plugin/reader/dbms/querier_help_test.go similarity index 94% rename from datax/plugin/reader/rdbm/querier_help_test.go rename to datax/plugin/reader/dbms/querier_help_test.go index 07d336f..8676ec8 100644 --- a/datax/plugin/reader/rdbm/querier_help_test.go +++ b/datax/plugin/reader/dbms/querier_help_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package rdbm +package dbms import ( "context" @@ -107,6 +107,7 @@ type MockQuerier struct { FetchErr error FetchMinErr error FetchMaxErr error + isTime bool } func (m *MockQuerier) Table(bt *database.BaseTable) database.Table { @@ -125,7 +126,11 @@ func (m *MockQuerier) FetchTableWithParam(ctx context.Context, param database.Parameter) (database.Table, error) { if _, ok := param.(*SplitParam); ok { t := NewMockTable(database.NewBaseTable("db", "schema", "name")) - t.AddField(database.NewBaseField(0, "f1", NewMockFieldType(database.GoTypeInt64))) + typ := database.GoTypeInt64 + if m.isTime { + typ = database.GoTypeTime + } + t.AddField(database.NewBaseField(0, "f1", NewMockFieldType(typ))) return t, m.FetchErr } return nil, m.FetchErr @@ -167,10 +172,10 @@ func (m *MockQuerier) Close() error { func testJSON() *config.JSON { return testJSONFromString(`{ - "name" : "rdbmreader", + "name" : "dbmsreader", "developer":"Breeze0806", - "dialect":"rdbm", - "description":"rdbm is base package for relational database" + "dialect":"dbms", + "description":"dbms is base package for relational database" }`) } diff --git a/datax/plugin/reader/rdbm/split.go b/datax/plugin/reader/dbms/split.go similarity index 75% rename from datax/plugin/reader/rdbm/split.go rename to datax/plugin/reader/dbms/split.go index 0335f9c..93cbea3 100644 --- a/datax/plugin/reader/rdbm/split.go +++ b/datax/plugin/reader/dbms/split.go @@ -12,9 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -package rdbm +package dbms import ( + "context" "fmt" "math/big" "time" @@ -29,6 +30,11 @@ const ( maxDuration time.Duration = 1<<63 - 1 ) +type splitRangeFetcher interface { + fetchMax(ctx context.Context, splitTable database.Table) (element.Column, error) + fetchMin(ctx context.Context, splitTable database.Table) (element.Column, error) +} + //SplitConfig 切分配置 type SplitConfig struct { Key string `json:"key"` //切分键 @@ -37,6 +43,72 @@ type SplitConfig struct { Range SplitRange `json:"range"` //切分范围 } +func (s *SplitConfig) fetchMin(ctx context.Context, + splitTable database.Table) (c element.Column, err error) { + if err = s.build(splitTable); err != nil { + return + } + return s.Range.leftColumn(s.Key) +} + +func (s *SplitConfig) fetchMax(ctx context.Context, + splitTable database.Table) (c element.Column, err error) { + if err = s.build(splitTable); err != nil { + return + } + return s.Range.rightColumn(s.Key) +} + +func (s *SplitConfig) build(splitTable database.Table) (err error) { + if err = s.checkType(splitTable); err != nil { + return err + } + return s.setLayout() +} + +func (s *SplitConfig) setLayout() error { + if s.Range.Type == string(element.TypeTime) { + tl := &timeLayout{} + tl.getLayout(s.TimeAccuracy) + s.Range.Layout = tl.layout + if s.Range.Layout == "" { + return fmt.Errorf( + "timeAccuracy(%v) should not be empty or valid when set range as non-empty", + s.TimeAccuracy) + } + } + return nil +} + +func (s SplitConfig) checkType(splitTable database.Table) (err error) { + if typ, ok := splitTable.Fields()[0].Type().(database.ValuerGoType); ok { + switch typ.GoType() { + case database.GoTypeInt64: + switch element.ColumnType(s.Range.Type) { + case element.TypeBigInt: + default: + return fmt.Errorf("checkType %v is not %v but %v", + s.Key, database.GoTypeInt64, s.Range.Type) + } + case database.GoTypeString: + switch element.ColumnType(s.Range.Type) { + case element.TypeBigInt, element.TypeString: + default: + return fmt.Errorf("checkType %v is not %v but %v", + s.Key, database.GoTypeString, s.Range.Type) + } + case database.GoTypeTime: + switch element.ColumnType(s.Range.Type) { + case element.TypeTime: + default: + return fmt.Errorf("checkType %v is not %v but %v", + s.Key, database.GoTypeTime, s.Range.Type) + } + } + } + return nil +} + //SplitRange 切分范围配置 type SplitRange struct { Type string `json:"type"` //类型 bigint, string, time @@ -46,33 +118,33 @@ type SplitRange struct { where string } -func (s SplitRange) leftColumn() (element.Column, error) { - return s.fetchColumn(s.Left) +func (s SplitRange) leftColumn(key string) (element.Column, error) { + return s.fetchColumn(key, s.Left) } -func (s SplitRange) rightColumn() (element.Column, error) { - return s.fetchColumn(s.Right) +func (s SplitRange) rightColumn(key string) (element.Column, error) { + return s.fetchColumn(key, s.Right) } -func (s SplitRange) fetchColumn(value string) (element.Column, error) { +func (s SplitRange) fetchColumn(key string, value string) (element.Column, error) { switch element.ColumnType(s.Type) { case element.TypeBigInt: bi, ok := new(big.Int).SetString(value, 10) if !ok { - return nil, errors.Errorf("value is not %v", element.TypeBigInt) + return nil, errors.Errorf("column(%v) value is not %v", key, element.TypeBigInt) } - return element.NewDefaultColumn(element.NewBigIntColumnValue(bi), "", 0), nil + return element.NewDefaultColumn(element.NewBigIntColumnValue(bi), key, 0), nil case element.TypeString: - return element.NewDefaultColumn(element.NewStringColumnValue(value), "", 0), nil + return element.NewDefaultColumn(element.NewStringColumnValue(value), key, 0), nil case element.TypeTime: t, err := time.Parse(s.Layout, value) if err != nil { - return nil, errors.Wrap(err, "value is not valid time") + return nil, errors.Wrapf(err, "column(%v) value is not valid time", key) } return element.NewDefaultColumn(element.NewTimeColumnValueWithDecoder(t, - element.NewStringTimeDecoder(s.Layout)), "", 0), nil + element.NewStringTimeDecoder(s.Layout)), key, 0), nil } - return nil, errors.Errorf("type(%v) does not support", s.Type) + return nil, errors.Errorf("column(%v) type(%v) does not support", key, s.Type) } func split(min, max element.Column, num int, @@ -215,7 +287,6 @@ func bigint2String(res *big.Int, radix int64) string { type timeLayout struct { layout string - min time.Time } func (t *timeLayout) unit() time.Duration { diff --git a/datax/plugin/reader/rdbm/split_test.go b/datax/plugin/reader/dbms/split_test.go similarity index 76% rename from datax/plugin/reader/rdbm/split_test.go rename to datax/plugin/reader/dbms/split_test.go index 9f1c5ad..f4a0477 100644 --- a/datax/plugin/reader/rdbm/split_test.go +++ b/datax/plugin/reader/dbms/split_test.go @@ -12,9 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -package rdbm +package dbms import ( + "context" + "database/sql" "math/big" "reflect" "testing" @@ -24,6 +26,16 @@ import ( "github.com/Breeze0806/go-etl/storage/database" ) +type MockFieldTypeWithGoType struct { + *database.BaseFieldType +} + +func NewMockFieldTypeWithGoType() *MockFieldType { + return &MockFieldType{ + BaseFieldType: database.NewBaseFieldType(&sql.ColumnType{}), + } +} + func TestSplitRange_fetchColumn(t *testing.T) { type args struct { value string @@ -101,7 +113,7 @@ func TestSplitRange_fetchColumn(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, err := tt.s.fetchColumn(tt.args.value) + got, err := tt.s.fetchColumn("", tt.args.value) if (err != nil) != tt.wantErr { t.Errorf("SplitRange.fetchColumn() error = %v, wantErr %v", err, tt.wantErr) return @@ -977,7 +989,6 @@ func Test_split(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { gotRanges, err := split(tt.args.min, tt.args.max, tt.args.num, tt.args.timeAccuracy, tt.args.splitField) - t.Log(err) if (err != nil) != tt.wantErr { t.Errorf("split() error = %v, wantErr %v", err, tt.wantErr) return @@ -988,3 +999,307 @@ func Test_split(t *testing.T) { }) } } + +func TestSplitConfig_fetchMin(t *testing.T) { + type args struct { + ctx context.Context + field database.Field + } + tests := []struct { + name string + s SplitConfig + args args + wantC element.Column + wantErr bool + }{ + { + name: "1", + s: SplitConfig{ + Key: "f1", + Range: SplitRange{ + Type: string(element.TypeBigInt), + Left: "100000", + }, + }, + args: args{ + ctx: context.TODO(), + field: NewMockField(database.NewBaseField(0, "f1", + NewMockFieldType(database.GoTypeInt64)), + NewMockFieldType(database.GoTypeInt64)), + }, + wantC: element.NewDefaultColumn(element.NewBigIntColumnValue(big.NewInt(100000)), + "f1", 0), + }, + { + name: "2", + s: SplitConfig{ + Key: "f1", + Range: SplitRange{ + Type: string(element.TypeString), + Left: "100000", + }, + }, + args: args{ + ctx: context.TODO(), + field: NewMockField(database.NewBaseField(0, "f1", NewMockFieldType(database.GoTypeInt64)), + NewMockFieldType(database.GoTypeInt64)), + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + splitTable := NewMockTable(database.NewBaseTable("db", "schema", "table")) + splitTable.AppendField(tt.args.field) + gotC, err := tt.s.fetchMin(tt.args.ctx, splitTable) + if (err != nil) != tt.wantErr { + t.Errorf("SplitConfig.fetchMin() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(gotC, tt.wantC) { + t.Errorf("SplitConfig.fetchMin() = %v, want %v", gotC, tt.wantC) + } + }) + } +} + +func TestSplitConfig_fetchMax(t *testing.T) { + type args struct { + ctx context.Context + field database.Field + } + tests := []struct { + name string + s SplitConfig + args args + wantC element.Column + wantErr bool + }{ + { + name: "1", + s: SplitConfig{ + Key: "f1", + Range: SplitRange{ + Type: string(element.TypeBigInt), + Right: "100000", + }, + }, + args: args{ + ctx: context.TODO(), + field: NewMockField(database.NewBaseField(0, "f1", + NewMockFieldType(database.GoTypeInt64)), + NewMockFieldType(database.GoTypeInt64)), + }, + wantC: element.NewDefaultColumn(element.NewBigIntColumnValue(big.NewInt(100000)), + "f1", 0), + }, + { + name: "2", + s: SplitConfig{ + Range: SplitRange{ + Type: string(element.TypeString), + }, + }, + args: args{ + ctx: context.TODO(), + field: NewMockField(database.NewBaseField(0, "f1", NewMockFieldType(database.GoTypeInt64)), + NewMockFieldType(database.GoTypeInt64)), + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + splitTable := NewMockTable(database.NewBaseTable("db", "schema", "table")) + splitTable.AppendField(tt.args.field) + gotC, err := tt.s.fetchMax(tt.args.ctx, splitTable) + if (err != nil) != tt.wantErr { + t.Errorf("SplitConfig.fetchMax() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(gotC, tt.wantC) { + t.Errorf("SplitConfig.fetchMax() = %v, want %v", gotC, tt.wantC) + } + }) + } +} + +func TestSplitConfig_checkType(t *testing.T) { + type args struct { + field database.Field + } + tests := []struct { + name string + s SplitConfig + args args + wantErr bool + }{ + { + name: "1", + s: SplitConfig{ + Key: "f1", + Range: SplitRange{ + Type: string(element.TypeBigInt), + }, + }, + args: args{ + field: NewMockField(database.NewBaseField(0, "f1", + NewMockFieldType(database.GoTypeInt64)), + NewMockFieldType(database.GoTypeInt64)), + }, + }, + { + name: "2", + s: SplitConfig{ + Key: "f1", + Range: SplitRange{ + Type: string(element.TypeString), + }, + }, + args: args{ + field: NewMockField(database.NewBaseField(0, "f1", + NewMockFieldType(database.GoTypeInt64)), + NewMockFieldType(database.GoTypeInt64)), + }, + wantErr: true, + }, + { + name: "3", + s: SplitConfig{ + Key: "f1", + Range: SplitRange{ + Type: string(element.TypeBigInt), + }, + }, + args: args{ + field: NewMockField(database.NewBaseField(0, "f1", + NewMockFieldType(database.GoTypeString)), + NewMockFieldType(database.GoTypeString)), + }, + }, + { + name: "4", + s: SplitConfig{ + Key: "f1", + Range: SplitRange{ + Type: string(element.TypeString), + }, + }, + args: args{ + field: NewMockField(database.NewBaseField(0, "f1", + NewMockFieldType(database.GoTypeString)), + NewMockFieldType(database.GoTypeString)), + }, + }, + { + name: "5", + s: SplitConfig{ + Key: "f1", + Range: SplitRange{ + Type: string(element.TypeDecimal), + }, + }, + args: args{ + field: NewMockField(database.NewBaseField(0, "f1", + NewMockFieldType(database.GoTypeString)), + NewMockFieldType(database.GoTypeString)), + }, + wantErr: true, + }, + { + name: "6", + s: SplitConfig{ + Key: "f1", + Range: SplitRange{ + Type: string(element.TypeTime), + }, + }, + args: args{ + field: NewMockField(database.NewBaseField(0, "f1", + NewMockFieldType(database.GoTypeTime)), + NewMockFieldType(database.GoTypeTime)), + }, + }, + { + name: "7", + s: SplitConfig{ + Key: "f1", + Range: SplitRange{ + Type: string(element.TypeString), + }, + }, + args: args{ + field: NewMockField(database.NewBaseField(0, "f1", + NewMockFieldType(database.GoTypeTime)), + NewMockFieldType(database.GoTypeTime)), + }, + wantErr: true, + }, + { + name: "8", + s: SplitConfig{ + Key: "f1", + Range: SplitRange{ + Type: string(element.TypeString), + }, + }, + args: args{ + field: NewMockField(database.NewBaseField(0, "f1", + NewMockFieldType(database.GoTypeTime)), + NewMockFieldTypeWithGoType()), + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + splitTable := NewMockTable(database.NewBaseTable("db", "schema", "table")) + splitTable.AppendField(tt.args.field) + if err := tt.s.checkType(splitTable); (err != nil) != tt.wantErr { + t.Errorf("SplitConfig.checkType() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} + +func TestSplitConfig_setLayout(t *testing.T) { + tests := []struct { + name string + s SplitConfig + want string + wantErr bool + }{ + { + name: "1", + s: SplitConfig{ + TimeAccuracy: "day", + Range: SplitRange{ + Type: string(element.TypeTime), + }, + }, + want: "2006-01-02", + wantErr: false, + }, + { + name: "2", + s: SplitConfig{ + TimeAccuracy: "", + Range: SplitRange{ + Type: string(element.TypeTime), + }, + }, + want: "", + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if err := tt.s.setLayout(); (err != nil) != tt.wantErr { + t.Errorf("SplitConfig.setLayout() error = %v, wantErr %v", err, tt.wantErr) + return + } + if tt.s.Range.Layout != tt.want { + t.Errorf("SplitConfig.Range.Layout = %v, want %v", tt.s.Range.Layout, tt.want) + } + }) + } +} diff --git a/datax/plugin/reader/rdbm/task.go b/datax/plugin/reader/dbms/task.go similarity index 99% rename from datax/plugin/reader/rdbm/task.go rename to datax/plugin/reader/dbms/task.go index 896151c..02786eb 100644 --- a/datax/plugin/reader/rdbm/task.go +++ b/datax/plugin/reader/dbms/task.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package rdbm +package dbms import ( "context" diff --git a/datax/plugin/reader/rdbm/task_test.go b/datax/plugin/reader/dbms/task_test.go similarity index 99% rename from datax/plugin/reader/rdbm/task_test.go rename to datax/plugin/reader/dbms/task_test.go index 6a3ecfb..3191913 100644 --- a/datax/plugin/reader/rdbm/task_test.go +++ b/datax/plugin/reader/dbms/task_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package rdbm +package dbms import ( "context" diff --git a/datax/plugin/reader/mysql/README.md b/datax/plugin/reader/mysql/README.md index 8159ea5..cbcbc78 100644 --- a/datax/plugin/reader/mysql/README.md +++ b/datax/plugin/reader/mysql/README.md @@ -8,7 +8,7 @@ MysqlReader插件实现了从mysql数据库读取数据。在底层实现上,M MysqlReader通过github.com/go-sql-driver/mysql连接远程Mysql数据库,并根据用户配置的信息生成查询SQL语句,然后发送到远程Mysql数据库,并将该SQL执行返回结果使用go-etl自定义的数据类型拼装为抽象的数据集,并传递给下游Writer处理。 -MysqlReader通过使用rdbmreader中定义的查询流程调用go-etl自定义的storage/database的DBWrapper来实现具体的查询。DBWrapper封装了database/sql的众多接口,并且抽象出了数据库方言Dialect。其中Mysql采取了storage/database/mysql实现的Dialect。 +MysqlReader通过使用dbmsreader中定义的查询流程调用go-etl自定义的storage/database的DBWrapper来实现具体的查询。DBWrapper封装了database/sql的众多接口,并且抽象出了数据库方言Dialect。其中Mysql采取了storage/database/mysql实现的Dialect。 ## 功能说明 @@ -110,6 +110,23 @@ MysqlReader通过使用rdbmreader中定义的查询流程调用go-etl自定义 - 必选:否 - 默认值: 无 +##### range + +###### type +- 描述 主要用于配置db2表的切分键默认值类型,值为bigInt/string/time,这里会检查表切分键中的类型,请务必确保类型正确。 +- 必选:否 +- 默认值: 无 + +###### left +- 描述 主要用于配置db2表的切分键默认最大值 +- 必选:否 +- 默认值: 无 + +###### right +- 描述 主要用于配置db2表的切分键默认最小值 +- 必选:否 +- 默认值: 无 + #### where - 描述 主要用于配置select的where条件 diff --git a/datax/plugin/reader/mysql/job.go b/datax/plugin/reader/mysql/job.go index 8f3a229..b4f2e08 100644 --- a/datax/plugin/reader/mysql/job.go +++ b/datax/plugin/reader/mysql/job.go @@ -15,10 +15,10 @@ package mysql import ( - "github.com/Breeze0806/go-etl/datax/plugin/reader/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/reader/dbms" ) //Job 工作 type Job struct { - *rdbm.Job + *dbms.Job } diff --git a/datax/plugin/reader/mysql/reader.go b/datax/plugin/reader/mysql/reader.go index f542da8..b2c2cda 100644 --- a/datax/plugin/reader/mysql/reader.go +++ b/datax/plugin/reader/mysql/reader.go @@ -17,7 +17,7 @@ package mysql import ( "github.com/Breeze0806/go-etl/config" spireader "github.com/Breeze0806/go-etl/datax/common/spi/reader" - "github.com/Breeze0806/go-etl/datax/plugin/reader/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/reader/dbms" "github.com/Breeze0806/go-etl/storage/database" //mysql storage @@ -37,7 +37,7 @@ func (r *Reader) ResourcesConfig() *config.JSON { //Job 工作 func (r *Reader) Job() spireader.Job { job := &Job{ - Job: rdbm.NewJob(rdbm.NewBaseDbHandler(func(name string, conf *config.JSON) (q rdbm.Querier, err error) { + Job: dbms.NewJob(dbms.NewBaseDbHandler(func(name string, conf *config.JSON) (q dbms.Querier, err error) { if q, err = database.Open(name, conf); err != nil { return nil, err } @@ -51,7 +51,7 @@ func (r *Reader) Job() spireader.Job { //Task 任务 func (r *Reader) Task() spireader.Task { task := &Task{ - Task: rdbm.NewTask(rdbm.NewBaseDbHandler(func(name string, conf *config.JSON) (q rdbm.Querier, err error) { + Task: dbms.NewTask(dbms.NewBaseDbHandler(func(name string, conf *config.JSON) (q dbms.Querier, err error) { if q, err = database.Open(name, conf); err != nil { return nil, err } diff --git a/datax/plugin/reader/mysql/task.go b/datax/plugin/reader/mysql/task.go index 47d6f27..37c7f61 100644 --- a/datax/plugin/reader/mysql/task.go +++ b/datax/plugin/reader/mysql/task.go @@ -18,15 +18,15 @@ import ( "context" "github.com/Breeze0806/go-etl/datax/common/plugin" - "github.com/Breeze0806/go-etl/datax/plugin/reader/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/reader/dbms" ) //Task 任务 type Task struct { - *rdbm.Task + *dbms.Task } //StartRead 开始读 func (t *Task) StartRead(ctx context.Context, sender plugin.RecordSender) (err error) { - return rdbm.StartRead(ctx, rdbm.NewBaseBatchReader(t.Task, "", nil), sender) + return dbms.StartRead(ctx, dbms.NewBaseBatchReader(t.Task, "", nil), sender) } diff --git a/datax/plugin/reader/oracle/README.md b/datax/plugin/reader/oracle/README.md index f01bb47..b453ea9 100644 --- a/datax/plugin/reader/oracle/README.md +++ b/datax/plugin/reader/oracle/README.md @@ -8,7 +8,7 @@ OracleReader插件实现了从Oracle数据库读取数据。在底层实现上 OracleReader通过github.com/godror/godror使用的Oracle Instant Client连接远程oracle数据库,并根据用户配置的信息生成查询SQL语句,然后发送到远程oracle数据库,并将该SQL执行返回结果使用go-etl自定义的数据类型拼装为抽象的数据集,并传递给下游Writer处理。 -OracleReader通过使用rdbmreader中定义的查询流程调用go-etl自定义的storage/database的DBWrapper来实现具体的查询。DBWrapper封装了database/sql的众多接口,并且抽象出了数据库方言Dialect。其中Oracle采取了storage/database/oracle实现的Dialect。 +OracleReader通过使用dbmsreader中定义的查询流程调用go-etl自定义的storage/database的DBWrapper来实现具体的查询。DBWrapper封装了database/sql的众多接口,并且抽象出了数据库方言Dialect。其中Oracle采取了storage/database/oracle实现的Dialect。 ## 功能说明 @@ -110,6 +110,23 @@ OracleReader通过使用rdbmreader中定义的查询流程调用go-etl自定义 - 必选:否 - 默认值: 无 +##### range + +###### type +- 描述 主要用于配置db2表的切分键默认值类型,值为bigInt/string/time,这里不会检查表切分键中的类型,但也请务必确保类型正确。 +- 必选:否 +- 默认值: 无 + +###### left +- 描述 主要用于配置db2表的切分键默认最大值 +- 必选:否 +- 默认值: 无 + +###### right +- 描述 主要用于配置db2表的切分键默认最小值 +- 必选:否 +- 默认值: 无 + #### where - 描述 主要用于配置select的where条件 diff --git a/datax/plugin/reader/oracle/job.go b/datax/plugin/reader/oracle/job.go index 0ed9420..f51319c 100644 --- a/datax/plugin/reader/oracle/job.go +++ b/datax/plugin/reader/oracle/job.go @@ -14,9 +14,9 @@ package oracle -import "github.com/Breeze0806/go-etl/datax/plugin/reader/rdbm" +import "github.com/Breeze0806/go-etl/datax/plugin/reader/dbms" //Job 工作 type Job struct { - *rdbm.Job + *dbms.Job } diff --git a/datax/plugin/reader/oracle/reader.go b/datax/plugin/reader/oracle/reader.go index ec29684..4f49ed9 100644 --- a/datax/plugin/reader/oracle/reader.go +++ b/datax/plugin/reader/oracle/reader.go @@ -17,7 +17,7 @@ package oracle import ( "github.com/Breeze0806/go-etl/config" spireader "github.com/Breeze0806/go-etl/datax/common/spi/reader" - "github.com/Breeze0806/go-etl/datax/plugin/reader/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/reader/dbms" "github.com/Breeze0806/go-etl/storage/database" ) @@ -34,7 +34,7 @@ func (r *Reader) ResourcesConfig() *config.JSON { //Job 工作 func (r *Reader) Job() spireader.Job { job := &Job{ - Job: rdbm.NewJob(rdbm.NewBaseDbHandler(func(name string, conf *config.JSON) (q rdbm.Querier, err error) { + Job: dbms.NewJob(dbms.NewBaseDbHandler(func(name string, conf *config.JSON) (q dbms.Querier, err error) { if q, err = database.Open(name, conf); err != nil { return nil, err } @@ -48,7 +48,7 @@ func (r *Reader) Job() spireader.Job { //Task 任务 func (r *Reader) Task() spireader.Task { task := &Task{ - Task: rdbm.NewTask(rdbm.NewBaseDbHandler(func(name string, conf *config.JSON) (q rdbm.Querier, err error) { + Task: dbms.NewTask(dbms.NewBaseDbHandler(func(name string, conf *config.JSON) (q dbms.Querier, err error) { if q, err = database.Open(name, conf); err != nil { return nil, err } diff --git a/datax/plugin/reader/oracle/task.go b/datax/plugin/reader/oracle/task.go index 27a5a69..0576d1a 100644 --- a/datax/plugin/reader/oracle/task.go +++ b/datax/plugin/reader/oracle/task.go @@ -18,7 +18,7 @@ import ( "context" "github.com/Breeze0806/go-etl/datax/common/plugin" - "github.com/Breeze0806/go-etl/datax/plugin/reader/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/reader/dbms" //oracle storage _ "github.com/Breeze0806/go-etl/storage/database/oracle" @@ -26,10 +26,10 @@ import ( //Task 任务 type Task struct { - *rdbm.Task + *dbms.Task } //StartRead 开始读 func (t *Task) StartRead(ctx context.Context, sender plugin.RecordSender) (err error) { - return rdbm.StartRead(ctx, rdbm.NewBaseBatchReader(t.Task, "", nil), sender) + return dbms.StartRead(ctx, dbms.NewBaseBatchReader(t.Task, "", nil), sender) } diff --git a/datax/plugin/reader/postgres/README.md b/datax/plugin/reader/postgres/README.md index 10eb02b..a0c6c25 100644 --- a/datax/plugin/reader/postgres/README.md +++ b/datax/plugin/reader/postgres/README.md @@ -8,7 +8,7 @@ PostgresReader插件实现了从Postgres/Greenplum数据库读取数据。在底 PostgresReader通过github.com/lib/pq连接远程Postgres/Greenplum数据库,并根据用户配置的信息生成查询SQL语句,然后发送到远程postgres/greenplum数据库,并将该SQL执行返回结果使用go-etl自定义的数据类型拼装为抽象的数据集,并传递给下游Writer处理。和直接使用github.com/lib/pq连接数据库不同的是,这里采用了github.com/Breeze0806/go/database/pqto以便能设置读写超时。 -PostgresReader通过使用rdbmreader中定义的查询流程调用go-etl自定义的storage/database的DBWrapper来实现具体的查询。DBWrapper封装了database/sql的众多接口,并且抽象出了数据库方言Dialect。其中postgres采取了storage/database/postgres实现的Dialect。 +PostgresReader通过使用dbmsreader中定义的查询流程调用go-etl自定义的storage/database的DBWrapper来实现具体的查询。DBWrapper封装了database/sql的众多接口,并且抽象出了数据库方言Dialect。其中postgres采取了storage/database/postgres实现的Dialect。 ## 功能说明 @@ -110,6 +110,23 @@ PostgresReader通过使用rdbmreader中定义的查询流程调用go-etl自定 - 必选:否 - 默认值: 无 +##### range + +###### type +- 描述 主要用于配置db2表的切分键默认值类型,值为bigInt/string/time,这里会检查表切分键中的类型,请务必确保类型正确。 +- 必选:否 +- 默认值: 无 + +###### left +- 描述 主要用于配置db2表的切分键默认最大值 +- 必选:否 +- 默认值: 无 + +###### right +- 描述 主要用于配置db2表的切分键默认最小值 +- 必选:否 +- 默认值: 无 + #### where - 描述 主要用于配置select的where条件 diff --git a/datax/plugin/reader/postgres/job.go b/datax/plugin/reader/postgres/job.go index ba0e850..9933712 100644 --- a/datax/plugin/reader/postgres/job.go +++ b/datax/plugin/reader/postgres/job.go @@ -15,10 +15,10 @@ package postgres import ( - "github.com/Breeze0806/go-etl/datax/plugin/reader/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/reader/dbms" ) //Job 工作 type Job struct { - *rdbm.Job + *dbms.Job } diff --git a/datax/plugin/reader/postgres/reader.go b/datax/plugin/reader/postgres/reader.go index 599d229..4bce1a2 100644 --- a/datax/plugin/reader/postgres/reader.go +++ b/datax/plugin/reader/postgres/reader.go @@ -17,7 +17,7 @@ package postgres import ( "github.com/Breeze0806/go-etl/config" spireader "github.com/Breeze0806/go-etl/datax/common/spi/reader" - "github.com/Breeze0806/go-etl/datax/plugin/reader/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/reader/dbms" "github.com/Breeze0806/go-etl/storage/database" //postgres storage @@ -37,8 +37,8 @@ func (r *Reader) ResourcesConfig() *config.JSON { //Job 工作 func (r *Reader) Job() spireader.Job { job := &Job{ - Job: rdbm.NewJob( - rdbm.NewBaseDbHandler(func(name string, conf *config.JSON) (q rdbm.Querier, err error) { + Job: dbms.NewJob( + dbms.NewBaseDbHandler(func(name string, conf *config.JSON) (q dbms.Querier, err error) { if q, err = database.Open(name, conf); err != nil { return nil, err } @@ -52,7 +52,7 @@ func (r *Reader) Job() spireader.Job { //Task 任务 func (r *Reader) Task() spireader.Task { task := &Task{ - Task: rdbm.NewTask(rdbm.NewBaseDbHandler(func(name string, conf *config.JSON) (q rdbm.Querier, err error) { + Task: dbms.NewTask(dbms.NewBaseDbHandler(func(name string, conf *config.JSON) (q dbms.Querier, err error) { if q, err = database.Open(name, conf); err != nil { return nil, err } diff --git a/datax/plugin/reader/postgres/task.go b/datax/plugin/reader/postgres/task.go index 363845e..b8fbd6c 100644 --- a/datax/plugin/reader/postgres/task.go +++ b/datax/plugin/reader/postgres/task.go @@ -18,15 +18,15 @@ import ( "context" "github.com/Breeze0806/go-etl/datax/common/plugin" - "github.com/Breeze0806/go-etl/datax/plugin/reader/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/reader/dbms" ) //Task 任务 type Task struct { - *rdbm.Task + *dbms.Task } //StartRead 开始读 func (t *Task) StartRead(ctx context.Context, sender plugin.RecordSender) (err error) { - return rdbm.StartRead(ctx, rdbm.NewBaseBatchReader(t.Task, "", nil), sender) + return dbms.StartRead(ctx, dbms.NewBaseBatchReader(t.Task, "", nil), sender) } diff --git a/datax/plugin/reader/sqlserver/README.md b/datax/plugin/reader/sqlserver/README.md index 15f9a6d..0bdabb6 100644 --- a/datax/plugin/reader/sqlserver/README.md +++ b/datax/plugin/reader/sqlserver/README.md @@ -8,7 +8,7 @@ SQLServerReader插件实现了从sql server数据库读取数据。在底层实 SQLServerReader通过github.com/denisenkom/go-mssqldb连接远程sql server数据库,并根据用户配置的信息生成查询SQL语句,然后发送到远程sql server数据库,并将该SQL执行返回结果使用go-etl自定义的数据类型拼装为抽象的数据集,并传递给下游Writer处理。和直接使用github.com/denisenkom/go-mssqldb。 -SQLServerReader通过使用rdbmreader中定义的查询流程调用go-etl自定义的storage/database的DBWrapper来实现具体的查询。DBWrapper封装了database/sql的众多接口,并且抽象出了数据库方言Dialect。其中sqlserver采取了storage/database/sqlserver实现的Dialect。 +SQLServerReader通过使用dbmsreader中定义的查询流程调用go-etl自定义的storage/database的DBWrapper来实现具体的查询。DBWrapper封装了database/sql的众多接口,并且抽象出了数据库方言Dialect。其中sqlserver采取了storage/database/sqlserver实现的Dialect。 ## 功能说明 @@ -117,6 +117,23 @@ SQLServerReader通过使用rdbmreader中定义的查询流程调用go-etl自定 - 必选:否 - 默认值: 无 +##### range + +###### type +- 描述 主要用于配置db2表的切分键默认值类型,值为bigInt/string/time,这里会检查表切分键中的类型,请务必确保类型正确。 +- 必选:否 +- 默认值: 无 + +###### left +- 描述 主要用于配置db2表的切分键默认最大值 +- 必选:否 +- 默认值: 无 + +###### right +- 描述 主要用于配置db2表的切分键默认最小值 +- 必选:否 +- 默认值: 无 + #### where - 描述 主要用于配置select的where条件 diff --git a/datax/plugin/reader/sqlserver/job.go b/datax/plugin/reader/sqlserver/job.go index ae29d7b..7c36da3 100644 --- a/datax/plugin/reader/sqlserver/job.go +++ b/datax/plugin/reader/sqlserver/job.go @@ -14,9 +14,9 @@ package sqlserver -import "github.com/Breeze0806/go-etl/datax/plugin/reader/rdbm" +import "github.com/Breeze0806/go-etl/datax/plugin/reader/dbms" //Job 工作 type Job struct { - *rdbm.Job + *dbms.Job } diff --git a/datax/plugin/reader/sqlserver/reader.go b/datax/plugin/reader/sqlserver/reader.go index 04fda4c..b2abc6b 100644 --- a/datax/plugin/reader/sqlserver/reader.go +++ b/datax/plugin/reader/sqlserver/reader.go @@ -17,7 +17,7 @@ package sqlserver import ( "github.com/Breeze0806/go-etl/config" spireader "github.com/Breeze0806/go-etl/datax/common/spi/reader" - "github.com/Breeze0806/go-etl/datax/plugin/reader/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/reader/dbms" "github.com/Breeze0806/go-etl/storage/database" //sqlserver storage @@ -37,7 +37,7 @@ func (r *Reader) ResourcesConfig() *config.JSON { //Job 工作 func (r *Reader) Job() spireader.Job { job := &Job{ - Job: rdbm.NewJob(rdbm.NewBaseDbHandler(func(name string, conf *config.JSON) (q rdbm.Querier, err error) { + Job: dbms.NewJob(dbms.NewBaseDbHandler(func(name string, conf *config.JSON) (q dbms.Querier, err error) { if q, err = database.Open(name, conf); err != nil { return nil, err } @@ -51,7 +51,7 @@ func (r *Reader) Job() spireader.Job { //Task 任务 func (r *Reader) Task() spireader.Task { task := &Task{ - Task: rdbm.NewTask(rdbm.NewBaseDbHandler(func(name string, conf *config.JSON) (q rdbm.Querier, err error) { + Task: dbms.NewTask(dbms.NewBaseDbHandler(func(name string, conf *config.JSON) (q dbms.Querier, err error) { if q, err = database.Open(name, conf); err != nil { return nil, err } diff --git a/datax/plugin/reader/sqlserver/task.go b/datax/plugin/reader/sqlserver/task.go index 78a0108..b4685a6 100644 --- a/datax/plugin/reader/sqlserver/task.go +++ b/datax/plugin/reader/sqlserver/task.go @@ -18,15 +18,15 @@ import ( "context" "github.com/Breeze0806/go-etl/datax/common/plugin" - "github.com/Breeze0806/go-etl/datax/plugin/reader/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/reader/dbms" ) //Task 任务 type Task struct { - *rdbm.Task + *dbms.Task } //StartRead 开始读 func (t *Task) StartRead(ctx context.Context, sender plugin.RecordSender) (err error) { - return rdbm.StartRead(ctx, rdbm.NewBaseBatchReader(t.Task, "", nil), sender) + return dbms.StartRead(ctx, dbms.NewBaseBatchReader(t.Task, "", nil), sender) } diff --git a/datax/plugin/writer/db2/README.md b/datax/plugin/writer/db2/README.md index fa5aeb4..e0722fd 100644 --- a/datax/plugin/writer/db2/README.md +++ b/datax/plugin/writer/db2/README.md @@ -7,7 +7,7 @@ DB2Writer插件实现了向DB2 LUW 数据库写入数据。在底层实现上, ## 实现原理 DB2Writer通过github.com/ibmdb/go_ibm_db利用db2的odbc库连接远程DB2 LUW数据库,并根据用户配置的信息和来自Reader的go-etl自定义的数据类型生成写入SQL语句,然后发送到远程DB2数据库执行。 -DB2Writer通过使用rdbmwriter中定义的查询流程调用go-etl自定义的storage/database的DBWrapper来实现具体的查询。DBWrapper封装了database/sql的众多接口,并且抽象出了数据库方言Dialect。其中DB2采取了storage/database/db2实现的Dialect。 +DB2Writer通过使用dbmswriter中定义的查询流程调用go-etl自定义的storage/database的DBWrapper来实现具体的查询。DBWrapper封装了database/sql的众多接口,并且抽象出了数据库方言Dialect。其中DB2采取了storage/database/db2实现的Dialect。 根据你配置的 `writeMode` 生成 diff --git a/datax/plugin/writer/db2/job.go b/datax/plugin/writer/db2/job.go index 2f8eeb8..4dcadc8 100644 --- a/datax/plugin/writer/db2/job.go +++ b/datax/plugin/writer/db2/job.go @@ -14,9 +14,9 @@ package db2 -import "github.com/Breeze0806/go-etl/datax/plugin/writer/rdbm" +import "github.com/Breeze0806/go-etl/datax/plugin/writer/dbms" //Job 工作 type Job struct { - *rdbm.Job + *dbms.Job } diff --git a/datax/plugin/writer/db2/task.go b/datax/plugin/writer/db2/task.go index 79d57bc..c048471 100644 --- a/datax/plugin/writer/db2/task.go +++ b/datax/plugin/writer/db2/task.go @@ -18,7 +18,7 @@ import ( "context" "github.com/Breeze0806/go-etl/datax/common/plugin" - "github.com/Breeze0806/go-etl/datax/plugin/writer/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/writer/dbms" "github.com/Breeze0806/go-etl/storage/database" //db2 dialect @@ -26,22 +26,22 @@ import ( ) var execModeMap = map[string]string{ - database.WriteModeInsert: rdbm.ExecModeNormal, + database.WriteModeInsert: dbms.ExecModeNormal, } func execMode(writeMode string) string { if mode, ok := execModeMap[writeMode]; ok { return mode } - return rdbm.ExecModeNormal + return dbms.ExecModeNormal } //Task 任务 type Task struct { - *rdbm.Task + *dbms.Task } //StartWrite 开始写 func (t *Task) StartWrite(ctx context.Context, receiver plugin.RecordReceiver) (err error) { - return rdbm.StartWrite(ctx, rdbm.NewBaseBatchWriter(t.Task, execMode(t.Config.GetWriteMode()), nil), receiver) + return dbms.StartWrite(ctx, dbms.NewBaseBatchWriter(t.Task, execMode(t.Config.GetWriteMode()), nil), receiver) } diff --git a/datax/plugin/writer/db2/task_test.go b/datax/plugin/writer/db2/task_test.go index 4e30a24..35efd7a 100644 --- a/datax/plugin/writer/db2/task_test.go +++ b/datax/plugin/writer/db2/task_test.go @@ -17,7 +17,7 @@ package db2 import ( "testing" - "github.com/Breeze0806/go-etl/datax/plugin/writer/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/writer/dbms" "github.com/Breeze0806/go-etl/storage/database" _ "github.com/Breeze0806/go-etl/storage/database/db2" ) @@ -36,14 +36,14 @@ func Test_execMode(t *testing.T) { args: args{ writeMode: database.WriteModeInsert, }, - want: rdbm.ExecModeNormal, + want: dbms.ExecModeNormal, }, { name: "2", args: args{ writeMode: "", }, - want: rdbm.ExecModeNormal, + want: dbms.ExecModeNormal, }, } for _, tt := range tests { diff --git a/datax/plugin/writer/db2/writer.go b/datax/plugin/writer/db2/writer.go index b377347..f24c41e 100644 --- a/datax/plugin/writer/db2/writer.go +++ b/datax/plugin/writer/db2/writer.go @@ -17,7 +17,7 @@ package db2 import ( "github.com/Breeze0806/go-etl/config" spiwriter "github.com/Breeze0806/go-etl/datax/common/spi/writer" - "github.com/Breeze0806/go-etl/datax/plugin/writer/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/writer/dbms" "github.com/Breeze0806/go-etl/storage/database" ) @@ -34,8 +34,8 @@ func (w *Writer) ResourcesConfig() *config.JSON { //Job 工作 func (w *Writer) Job() spiwriter.Job { job := &Job{ - Job: rdbm.NewJob(rdbm.NewBaseDbHandler( - func(name string, conf *config.JSON) (e rdbm.Execer, err error) { + Job: dbms.NewJob(dbms.NewBaseDbHandler( + func(name string, conf *config.JSON) (e dbms.Execer, err error) { if e, err = database.Open(name, conf); err != nil { return nil, err } @@ -49,8 +49,8 @@ func (w *Writer) Job() spiwriter.Job { //Task 任务 func (w *Writer) Task() spiwriter.Task { task := &Task{ - Task: rdbm.NewTask(rdbm.NewBaseDbHandler( - func(name string, conf *config.JSON) (e rdbm.Execer, err error) { + Task: dbms.NewTask(dbms.NewBaseDbHandler( + func(name string, conf *config.JSON) (e dbms.Execer, err error) { if e, err = database.Open(name, conf); err != nil { return nil, err } diff --git a/datax/plugin/writer/rdbm/batch_writer.go b/datax/plugin/writer/dbms/batch_writer.go similarity index 99% rename from datax/plugin/writer/rdbm/batch_writer.go rename to datax/plugin/writer/dbms/batch_writer.go index e8bb6a5..8ea389a 100644 --- a/datax/plugin/writer/rdbm/batch_writer.go +++ b/datax/plugin/writer/dbms/batch_writer.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package rdbm +package dbms import ( "context" diff --git a/datax/plugin/writer/rdbm/batch_writer_test.go b/datax/plugin/writer/dbms/batch_writer_test.go similarity index 99% rename from datax/plugin/writer/rdbm/batch_writer_test.go rename to datax/plugin/writer/dbms/batch_writer_test.go index a03bb40..4606e4e 100644 --- a/datax/plugin/writer/rdbm/batch_writer_test.go +++ b/datax/plugin/writer/dbms/batch_writer_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package rdbm +package dbms import ( "context" diff --git a/datax/plugin/writer/rdbm/config.go b/datax/plugin/writer/dbms/config.go similarity index 94% rename from datax/plugin/writer/rdbm/config.go rename to datax/plugin/writer/dbms/config.go index 396d445..7050c78 100644 --- a/datax/plugin/writer/rdbm/config.go +++ b/datax/plugin/writer/dbms/config.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package rdbm +package dbms import ( "encoding/json" @@ -21,7 +21,7 @@ import ( "time" "github.com/Breeze0806/go-etl/config" - rdbmreader "github.com/Breeze0806/go-etl/datax/plugin/reader/rdbm" + dbmsreader "github.com/Breeze0806/go-etl/datax/plugin/reader/dbms" "github.com/Breeze0806/go-etl/schedule" "github.com/Breeze0806/go-etl/storage/database" "github.com/Breeze0806/go/time2" @@ -38,7 +38,7 @@ type Config interface { GetUsername() string //获取用户名 GetPassword() string //获取密码 GetURL() string //获取连接url - GetColumns() []rdbmreader.Column //获取列信息 + GetColumns() []dbmsreader.Column //获取列信息 GetBaseTable() *database.BaseTable //获取表信息 GetWriteMode() string //获取写入模式 GetBatchSize() int //单次批量写入数 @@ -54,7 +54,7 @@ type BaseConfig struct { Username string `json:"username"` //用户名 Password string `json:"password"` //密码 Column []string `json:"column"` //列信息 - Connection rdbmreader.ConnConfig `json:"connection"` //连接信息 + Connection dbmsreader.ConnConfig `json:"connection"` //连接信息 WriteMode string `json:"writeMode"` //写入模式,如插入insert BatchSize int `json:"batchSize"` //单次批量写入数 BatchTimeout time2.Duration `json:"batchTimeout"` //单次批量写入超时时间 @@ -104,9 +104,9 @@ func (b *BaseConfig) GetURL() string { } //GetColumns 获取列信息 -func (b *BaseConfig) GetColumns() (columns []rdbmreader.Column) { +func (b *BaseConfig) GetColumns() (columns []dbmsreader.Column) { for _, v := range b.Column { - columns = append(columns, &rdbmreader.BaseColumn{ + columns = append(columns, &dbmsreader.BaseColumn{ Name: v, }) } diff --git a/datax/plugin/writer/rdbm/config_test.go b/datax/plugin/writer/dbms/config_test.go similarity index 96% rename from datax/plugin/writer/rdbm/config_test.go rename to datax/plugin/writer/dbms/config_test.go index dade5a1..765afc3 100644 --- a/datax/plugin/writer/rdbm/config_test.go +++ b/datax/plugin/writer/dbms/config_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package rdbm +package dbms import ( "errors" @@ -21,7 +21,7 @@ import ( "time" "github.com/Breeze0806/go-etl/config" - rdbmreader "github.com/Breeze0806/go-etl/datax/plugin/reader/rdbm" + dbmsreader "github.com/Breeze0806/go-etl/datax/plugin/reader/dbms" "github.com/Breeze0806/go-etl/schedule" ) @@ -57,24 +57,24 @@ func TestBaseConfig_GetColumns(t *testing.T) { tests := []struct { name string b *BaseConfig - wantColumns []rdbmreader.Column + wantColumns []dbmsreader.Column }{ { name: "1", b: &BaseConfig{ Column: []string{"f1", "f2", "f3", "f4"}, }, - wantColumns: []rdbmreader.Column{ - &rdbmreader.BaseColumn{ + wantColumns: []dbmsreader.Column{ + &dbmsreader.BaseColumn{ Name: "f1", }, - &rdbmreader.BaseColumn{ + &dbmsreader.BaseColumn{ Name: "f2", }, - &rdbmreader.BaseColumn{ + &dbmsreader.BaseColumn{ Name: "f3", }, - &rdbmreader.BaseColumn{ + &dbmsreader.BaseColumn{ Name: "f4", }, }, diff --git a/datax/plugin/writer/rdbm/db_handler.go b/datax/plugin/writer/dbms/db_handler.go similarity index 93% rename from datax/plugin/writer/rdbm/db_handler.go rename to datax/plugin/writer/dbms/db_handler.go index 5436f3d..fb34d1e 100644 --- a/datax/plugin/writer/rdbm/db_handler.go +++ b/datax/plugin/writer/dbms/db_handler.go @@ -12,13 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -package rdbm +package dbms import ( "database/sql" "github.com/Breeze0806/go-etl/config" - rdbmreader "github.com/Breeze0806/go-etl/datax/plugin/reader/rdbm" + dbmsreader "github.com/Breeze0806/go-etl/datax/plugin/reader/dbms" "github.com/Breeze0806/go-etl/storage/database" ) @@ -55,5 +55,5 @@ func (d *BaseDbHandler) Config(conf *config.JSON) (Config, error) { //TableParam 通过关系型数据库配置和执行器获取表参数 func (d *BaseDbHandler) TableParam(config Config, execer Execer) database.Parameter { - return rdbmreader.NewTableParam(config, execer, d.opts) + return dbmsreader.NewTableParam(config, execer, d.opts) } diff --git a/datax/plugin/writer/rdbm/execer.go b/datax/plugin/writer/dbms/execer.go similarity index 99% rename from datax/plugin/writer/rdbm/execer.go rename to datax/plugin/writer/dbms/execer.go index 65590c9..f5f05b1 100644 --- a/datax/plugin/writer/rdbm/execer.go +++ b/datax/plugin/writer/dbms/execer.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package rdbm +package dbms import ( "context" diff --git a/datax/plugin/writer/rdbm/execer_help_test.go b/datax/plugin/writer/dbms/execer_help_test.go similarity index 97% rename from datax/plugin/writer/rdbm/execer_help_test.go rename to datax/plugin/writer/dbms/execer_help_test.go index 843bc50..2289962 100644 --- a/datax/plugin/writer/rdbm/execer_help_test.go +++ b/datax/plugin/writer/dbms/execer_help_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package rdbm +package dbms import ( "context" @@ -188,10 +188,10 @@ func (m *MockExecer) Close() error { func testJSON() *config.JSON { return testJSONFromString(`{ - "name" : "rdbmwriter", + "name" : "dbmswriter", "developer":"Breeze0806", - "dialect":"rdbm", - "description":"rdbm is base package for relational database" + "dialect":"dbms", + "description":"dbms is base package for relational database" }`) } diff --git a/datax/plugin/writer/rdbm/job.go b/datax/plugin/writer/dbms/job.go similarity index 99% rename from datax/plugin/writer/rdbm/job.go rename to datax/plugin/writer/dbms/job.go index d4ccc92..6670c2e 100644 --- a/datax/plugin/writer/rdbm/job.go +++ b/datax/plugin/writer/dbms/job.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package rdbm +package dbms import ( "context" diff --git a/datax/plugin/writer/rdbm/job_test.go b/datax/plugin/writer/dbms/job_test.go similarity index 99% rename from datax/plugin/writer/rdbm/job_test.go rename to datax/plugin/writer/dbms/job_test.go index a5025f2..8ca9b0a 100644 --- a/datax/plugin/writer/rdbm/job_test.go +++ b/datax/plugin/writer/dbms/job_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package rdbm +package dbms import ( "context" diff --git a/datax/plugin/writer/rdbm/log.go b/datax/plugin/writer/dbms/log.go similarity index 98% rename from datax/plugin/writer/rdbm/log.go rename to datax/plugin/writer/dbms/log.go index 2f43843..49e805e 100644 --- a/datax/plugin/writer/rdbm/log.go +++ b/datax/plugin/writer/dbms/log.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package rdbm +package dbms import ( "os" diff --git a/datax/plugin/writer/rdbm/task.go b/datax/plugin/writer/dbms/task.go similarity index 99% rename from datax/plugin/writer/rdbm/task.go rename to datax/plugin/writer/dbms/task.go index 7b3d803..03099a1 100644 --- a/datax/plugin/writer/rdbm/task.go +++ b/datax/plugin/writer/dbms/task.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package rdbm +package dbms import ( "context" diff --git a/datax/plugin/writer/rdbm/task_test.go b/datax/plugin/writer/dbms/task_test.go similarity index 99% rename from datax/plugin/writer/rdbm/task_test.go rename to datax/plugin/writer/dbms/task_test.go index e75ec68..afe5bdd 100644 --- a/datax/plugin/writer/rdbm/task_test.go +++ b/datax/plugin/writer/dbms/task_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package rdbm +package dbms import ( "context" diff --git a/datax/plugin/writer/mysql/README.md b/datax/plugin/writer/mysql/README.md index 4f0f90c..f0ac1b0 100644 --- a/datax/plugin/writer/mysql/README.md +++ b/datax/plugin/writer/mysql/README.md @@ -8,7 +8,7 @@ MysqlWriter插件实现了向Postgres/Greenplum数据库写入数据。在底层 MysqlWriter通过github.com/go-sql-driver/mysql连接远程Mysql数据库,并根据用户配置的信息和来自Reader的go-etl自定义的数据类型生成写入SQL语句,然后发送到远程Mysql数据库执行。 -MysqlWriter通过使用rdbmwriter中定义的查询流程调用go-etl自定义的storage/database的DBWrapper来实现具体的查询。DBWrapper封装了database/sql的众多接口,并且抽象出了数据库方言Dialect。其中Mysql采取了storage/database/mysql实现的Dialect。 +MysqlWriter通过使用dbmswriter中定义的查询流程调用go-etl自定义的storage/database的DBWrapper来实现具体的查询。DBWrapper封装了database/sql的众多接口,并且抽象出了数据库方言Dialect。其中Mysql采取了storage/database/mysql实现的Dialect。 根据你配置的 `writeMode` 生成 diff --git a/datax/plugin/writer/mysql/job.go b/datax/plugin/writer/mysql/job.go index 59443b5..844b6ce 100644 --- a/datax/plugin/writer/mysql/job.go +++ b/datax/plugin/writer/mysql/job.go @@ -15,10 +15,10 @@ package mysql import ( - "github.com/Breeze0806/go-etl/datax/plugin/writer/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/writer/dbms" ) //Job 工作 type Job struct { - *rdbm.Job + *dbms.Job } diff --git a/datax/plugin/writer/mysql/task.go b/datax/plugin/writer/mysql/task.go index 9aeb04a..1452fc1 100644 --- a/datax/plugin/writer/mysql/task.go +++ b/datax/plugin/writer/mysql/task.go @@ -18,7 +18,7 @@ import ( "context" "github.com/Breeze0806/go-etl/datax/common/plugin" - "github.com/Breeze0806/go-etl/datax/plugin/writer/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/writer/dbms" "github.com/Breeze0806/go-etl/storage/database" "github.com/Breeze0806/go-etl/storage/database/mysql" ) @@ -26,24 +26,24 @@ import ( const maxNumPlaceholder = 65535 var execModeMap = map[string]string{ - database.WriteModeInsert: rdbm.ExecModeNormal, - mysql.WriteModeReplace: rdbm.ExecModeNormal, + database.WriteModeInsert: dbms.ExecModeNormal, + mysql.WriteModeReplace: dbms.ExecModeNormal, } func execMode(writeMode string) string { if mode, ok := execModeMap[writeMode]; ok { return mode } - return rdbm.ExecModeNormal + return dbms.ExecModeNormal } //Task 任务 type Task struct { - *rdbm.Task + *dbms.Task } type batchWriter struct { - *rdbm.BaseBatchWriter + *dbms.BaseBatchWriter } func (b *batchWriter) BatchSize() (size int) { @@ -56,5 +56,5 @@ func (b *batchWriter) BatchSize() (size int) { //StartWrite 开始写 func (t *Task) StartWrite(ctx context.Context, receiver plugin.RecordReceiver) (err error) { - return rdbm.StartWrite(ctx, &batchWriter{BaseBatchWriter: rdbm.NewBaseBatchWriter(t.Task, execMode(t.Config.GetWriteMode()), nil)}, receiver) + return dbms.StartWrite(ctx, &batchWriter{BaseBatchWriter: dbms.NewBaseBatchWriter(t.Task, execMode(t.Config.GetWriteMode()), nil)}, receiver) } diff --git a/datax/plugin/writer/mysql/task_test.go b/datax/plugin/writer/mysql/task_test.go index dd906cb..209f5d1 100644 --- a/datax/plugin/writer/mysql/task_test.go +++ b/datax/plugin/writer/mysql/task_test.go @@ -17,7 +17,7 @@ package mysql import ( "testing" - "github.com/Breeze0806/go-etl/datax/plugin/writer/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/writer/dbms" "github.com/Breeze0806/go-etl/storage/database" "github.com/Breeze0806/go-etl/storage/database/mysql" ) @@ -56,8 +56,8 @@ func Test_batchWriter_BatchSize(t *testing.T) { { name: "1", b: &batchWriter{ - BaseBatchWriter: rdbm.NewBaseBatchWriter(&rdbm.Task{ - Config: &rdbm.BaseConfig{ + BaseBatchWriter: dbms.NewBaseBatchWriter(&dbms.Task{ + Config: &dbms.BaseConfig{ BatchSize: 1000, }, Table: newMockTable(maxNumPlaceholder / 1000), @@ -68,8 +68,8 @@ func Test_batchWriter_BatchSize(t *testing.T) { { name: "1", b: &batchWriter{ - BaseBatchWriter: rdbm.NewBaseBatchWriter(&rdbm.Task{ - Config: &rdbm.BaseConfig{ + BaseBatchWriter: dbms.NewBaseBatchWriter(&dbms.Task{ + Config: &dbms.BaseConfig{ BatchSize: 10000, }, Table: newMockTable(32), @@ -101,21 +101,21 @@ func Test_execMode(t *testing.T) { args: args{ writeMode: database.WriteModeInsert, }, - want: rdbm.ExecModeNormal, + want: dbms.ExecModeNormal, }, { name: "2", args: args{ writeMode: mysql.WriteModeReplace, }, - want: rdbm.ExecModeNormal, + want: dbms.ExecModeNormal, }, { name: "3", args: args{ writeMode: "", }, - want: rdbm.ExecModeNormal, + want: dbms.ExecModeNormal, }, } for _, tt := range tests { diff --git a/datax/plugin/writer/mysql/writer.go b/datax/plugin/writer/mysql/writer.go index 9b76c73..72c441b 100644 --- a/datax/plugin/writer/mysql/writer.go +++ b/datax/plugin/writer/mysql/writer.go @@ -17,7 +17,7 @@ package mysql import ( "github.com/Breeze0806/go-etl/config" spiwriter "github.com/Breeze0806/go-etl/datax/common/spi/writer" - "github.com/Breeze0806/go-etl/datax/plugin/writer/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/writer/dbms" "github.com/Breeze0806/go-etl/storage/database" //mysql storage @@ -37,8 +37,8 @@ func (w *Writer) ResourcesConfig() *config.JSON { //Job 工作 func (w *Writer) Job() spiwriter.Job { job := &Job{ - Job: rdbm.NewJob(rdbm.NewBaseDbHandler( - func(name string, conf *config.JSON) (e rdbm.Execer, err error) { + Job: dbms.NewJob(dbms.NewBaseDbHandler( + func(name string, conf *config.JSON) (e dbms.Execer, err error) { if e, err = database.Open(name, conf); err != nil { return nil, err } @@ -52,8 +52,8 @@ func (w *Writer) Job() spiwriter.Job { //Task 任务 func (w *Writer) Task() spiwriter.Task { task := &Task{ - Task: rdbm.NewTask(rdbm.NewBaseDbHandler( - func(name string, conf *config.JSON) (e rdbm.Execer, err error) { + Task: dbms.NewTask(dbms.NewBaseDbHandler( + func(name string, conf *config.JSON) (e dbms.Execer, err error) { if e, err = database.Open(name, conf); err != nil { return nil, err } diff --git a/datax/plugin/writer/oracle/README.md b/datax/plugin/writer/oracle/README.md index 7b83f66..4366a17 100644 --- a/datax/plugin/writer/oracle/README.md +++ b/datax/plugin/writer/oracle/README.md @@ -8,7 +8,7 @@ OracleReader插件实现了向Oracle数据库写入数据。在底层实现上 OracleReader通过github.com/godror/godror使用的Oracle Instant Client连接远程oracle数据库,并根据用户配置的信息和来自Reader的go-etl自定义的数据类型生成写入SQL语句,然后发送到远程Oracle数据库执行。 -OracleReader通过使用rdbmwriter中中定义的查询流程调用go-etl自定义的storage/database的DBWrapper来实现具体的查询。DBWrapper封装了database/sql的众多接口,并且抽象出了数据库方言Dialect。其中Oracle采取了storage/database/oracle实现的Dialect。 +OracleReader通过使用dbmswriter中中定义的查询流程调用go-etl自定义的storage/database的DBWrapper来实现具体的查询。DBWrapper封装了database/sql的众多接口,并且抽象出了数据库方言Dialect。其中Oracle采取了storage/database/oracle实现的Dialect。 根据你配置的 `writeMode` 生成 diff --git a/datax/plugin/writer/oracle/job.go b/datax/plugin/writer/oracle/job.go index 2280cc2..6ce1b40 100644 --- a/datax/plugin/writer/oracle/job.go +++ b/datax/plugin/writer/oracle/job.go @@ -14,9 +14,9 @@ package oracle -import "github.com/Breeze0806/go-etl/datax/plugin/writer/rdbm" +import "github.com/Breeze0806/go-etl/datax/plugin/writer/dbms" //Job 工作 type Job struct { - *rdbm.Job + *dbms.Job } diff --git a/datax/plugin/writer/oracle/task.go b/datax/plugin/writer/oracle/task.go index ae88075..0a068d8 100644 --- a/datax/plugin/writer/oracle/task.go +++ b/datax/plugin/writer/oracle/task.go @@ -18,29 +18,29 @@ import ( "context" "github.com/Breeze0806/go-etl/datax/common/plugin" - "github.com/Breeze0806/go-etl/datax/plugin/writer/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/writer/dbms" //oracle dialect "github.com/Breeze0806/go-etl/storage/database/oracle" ) var execModeMap = map[string]string{ - oracle.WriteModeInsert: rdbm.ExecModeNormal, + oracle.WriteModeInsert: dbms.ExecModeNormal, } func execMode(writeMode string) string { if mode, ok := execModeMap[writeMode]; ok { return mode } - return rdbm.ExecModeNormal + return dbms.ExecModeNormal } //Task 任务 type Task struct { - *rdbm.Task + *dbms.Task } //StartWrite 开始写 func (t *Task) StartWrite(ctx context.Context, receiver plugin.RecordReceiver) (err error) { - return rdbm.StartWrite(ctx, rdbm.NewBaseBatchWriter(t.Task, execMode(t.Config.GetWriteMode()), nil), receiver) + return dbms.StartWrite(ctx, dbms.NewBaseBatchWriter(t.Task, execMode(t.Config.GetWriteMode()), nil), receiver) } diff --git a/datax/plugin/writer/oracle/task_test.go b/datax/plugin/writer/oracle/task_test.go index c85e944..2755157 100644 --- a/datax/plugin/writer/oracle/task_test.go +++ b/datax/plugin/writer/oracle/task_test.go @@ -17,7 +17,7 @@ package oracle import ( "testing" - "github.com/Breeze0806/go-etl/datax/plugin/writer/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/writer/dbms" "github.com/Breeze0806/go-etl/storage/database/oracle" ) @@ -35,14 +35,14 @@ func Test_execMode(t *testing.T) { args: args{ writeMode: oracle.WriteModeInsert, }, - want: rdbm.ExecModeNormal, + want: dbms.ExecModeNormal, }, { name: "2", args: args{ writeMode: "", }, - want: rdbm.ExecModeNormal, + want: dbms.ExecModeNormal, }, } for _, tt := range tests { diff --git a/datax/plugin/writer/oracle/writer.go b/datax/plugin/writer/oracle/writer.go index 0a71bee..c526886 100644 --- a/datax/plugin/writer/oracle/writer.go +++ b/datax/plugin/writer/oracle/writer.go @@ -17,7 +17,7 @@ package oracle import ( "github.com/Breeze0806/go-etl/config" spiwriter "github.com/Breeze0806/go-etl/datax/common/spi/writer" - "github.com/Breeze0806/go-etl/datax/plugin/writer/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/writer/dbms" "github.com/Breeze0806/go-etl/storage/database" ) @@ -34,8 +34,8 @@ func (w *Writer) ResourcesConfig() *config.JSON { //Job 工作 func (w *Writer) Job() spiwriter.Job { job := &Job{ - Job: rdbm.NewJob(rdbm.NewBaseDbHandler( - func(name string, conf *config.JSON) (e rdbm.Execer, err error) { + Job: dbms.NewJob(dbms.NewBaseDbHandler( + func(name string, conf *config.JSON) (e dbms.Execer, err error) { if e, err = database.Open(name, conf); err != nil { return nil, err } @@ -49,8 +49,8 @@ func (w *Writer) Job() spiwriter.Job { //Task 任务 func (w *Writer) Task() spiwriter.Task { task := &Task{ - Task: rdbm.NewTask(rdbm.NewBaseDbHandler( - func(name string, conf *config.JSON) (e rdbm.Execer, err error) { + Task: dbms.NewTask(dbms.NewBaseDbHandler( + func(name string, conf *config.JSON) (e dbms.Execer, err error) { if e, err = database.Open(name, conf); err != nil { return nil, err } diff --git a/datax/plugin/writer/postgres/README.md b/datax/plugin/writer/postgres/README.md index 70f52be..1ab71b9 100644 --- a/datax/plugin/writer/postgres/README.md +++ b/datax/plugin/writer/postgres/README.md @@ -8,7 +8,7 @@ PostgresWriter插件实现了向Postgres/Greenplum数据库写入数据。在底 PostgresWriter通过github.com/lib/pq连接远程Postgres/Greenplum数据库,并根据用户配置的信息和来自Reader的go-etl自定义的数据类型生成写入SQL语句,然后发送到远程Postgres/Greenplum数据库执行。 -Postgres/Greenplum通过使用rdbmwriter中定义的查询流程调用go-etl自定义的storage/database的DBWrapper来实现具体的查询。DBWrapper封装了database/sql的众多接口,并且抽象出了数据库方言Dialect。其中Postgres/Greenplum采取了storage/database/postgres实现的Dialect。 +Postgres/Greenplum通过使用dbmswriter中定义的查询流程调用go-etl自定义的storage/database的DBWrapper来实现具体的查询。DBWrapper封装了database/sql的众多接口,并且抽象出了数据库方言Dialect。其中Postgres/Greenplum采取了storage/database/postgres实现的Dialect。 根据你配置的 `writeMode` 生成 diff --git a/datax/plugin/writer/postgres/job.go b/datax/plugin/writer/postgres/job.go index 299310e..7ef6db5 100644 --- a/datax/plugin/writer/postgres/job.go +++ b/datax/plugin/writer/postgres/job.go @@ -14,9 +14,9 @@ package postgres -import "github.com/Breeze0806/go-etl/datax/plugin/writer/rdbm" +import "github.com/Breeze0806/go-etl/datax/plugin/writer/dbms" //Job 工作 type Job struct { - *rdbm.Job + *dbms.Job } diff --git a/datax/plugin/writer/postgres/task.go b/datax/plugin/writer/postgres/task.go index 9a9fe61..dabd283 100644 --- a/datax/plugin/writer/postgres/task.go +++ b/datax/plugin/writer/postgres/task.go @@ -18,29 +18,29 @@ import ( "context" "github.com/Breeze0806/go-etl/datax/common/plugin" - "github.com/Breeze0806/go-etl/datax/plugin/writer/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/writer/dbms" "github.com/Breeze0806/go-etl/storage/database" "github.com/Breeze0806/go-etl/storage/database/postgres" ) var execModeMap = map[string]string{ - database.WriteModeInsert: rdbm.ExecModeNormal, - postgres.WriteModeCopyIn: rdbm.ExecModeStmtTx, + database.WriteModeInsert: dbms.ExecModeNormal, + postgres.WriteModeCopyIn: dbms.ExecModeStmtTx, } func execMode(writeMode string) string { if mode, ok := execModeMap[writeMode]; ok { return mode } - return rdbm.ExecModeNormal + return dbms.ExecModeNormal } //Task 任务 type Task struct { - *rdbm.Task + *dbms.Task } //StartWrite 开始写 func (t *Task) StartWrite(ctx context.Context, receiver plugin.RecordReceiver) (err error) { - return rdbm.StartWrite(ctx, rdbm.NewBaseBatchWriter(t.Task, execMode(t.Config.GetWriteMode()), nil), receiver) + return dbms.StartWrite(ctx, dbms.NewBaseBatchWriter(t.Task, execMode(t.Config.GetWriteMode()), nil), receiver) } diff --git a/datax/plugin/writer/postgres/task_test.go b/datax/plugin/writer/postgres/task_test.go index c5dd7da..437f209 100644 --- a/datax/plugin/writer/postgres/task_test.go +++ b/datax/plugin/writer/postgres/task_test.go @@ -17,7 +17,7 @@ package postgres import ( "testing" - "github.com/Breeze0806/go-etl/datax/plugin/writer/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/writer/dbms" "github.com/Breeze0806/go-etl/storage/database" "github.com/Breeze0806/go-etl/storage/database/postgres" ) @@ -36,21 +36,21 @@ func Test_execMode(t *testing.T) { args: args{ writeMode: database.WriteModeInsert, }, - want: rdbm.ExecModeNormal, + want: dbms.ExecModeNormal, }, { name: "2", args: args{ writeMode: postgres.WriteModeCopyIn, }, - want: rdbm.ExecModeStmtTx, + want: dbms.ExecModeStmtTx, }, { name: "3", args: args{ writeMode: "", }, - want: rdbm.ExecModeNormal, + want: dbms.ExecModeNormal, }, } for _, tt := range tests { diff --git a/datax/plugin/writer/postgres/writer.go b/datax/plugin/writer/postgres/writer.go index 72b7450..e4798e1 100644 --- a/datax/plugin/writer/postgres/writer.go +++ b/datax/plugin/writer/postgres/writer.go @@ -17,7 +17,7 @@ package postgres import ( "github.com/Breeze0806/go-etl/config" spiwriter "github.com/Breeze0806/go-etl/datax/common/spi/writer" - "github.com/Breeze0806/go-etl/datax/plugin/writer/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/writer/dbms" "github.com/Breeze0806/go-etl/storage/database" //postgres storage @@ -37,8 +37,8 @@ func (w *Writer) ResourcesConfig() *config.JSON { //Job 工作 func (w *Writer) Job() spiwriter.Job { job := &Job{ - Job: rdbm.NewJob(rdbm.NewBaseDbHandler( - func(name string, conf *config.JSON) (e rdbm.Execer, err error) { + Job: dbms.NewJob(dbms.NewBaseDbHandler( + func(name string, conf *config.JSON) (e dbms.Execer, err error) { if e, err = database.Open(name, conf); err != nil { return nil, err } @@ -52,8 +52,8 @@ func (w *Writer) Job() spiwriter.Job { //Task 任务 func (w *Writer) Task() spiwriter.Task { task := &Task{ - Task: rdbm.NewTask(rdbm.NewBaseDbHandler( - func(name string, conf *config.JSON) (e rdbm.Execer, err error) { + Task: dbms.NewTask(dbms.NewBaseDbHandler( + func(name string, conf *config.JSON) (e dbms.Execer, err error) { if e, err = database.Open(name, conf); err != nil { return nil, err } diff --git a/datax/plugin/writer/sqlserver/README.md b/datax/plugin/writer/sqlserver/README.md index e07125b..44f0cd0 100644 --- a/datax/plugin/writer/sqlserver/README.md +++ b/datax/plugin/writer/sqlserver/README.md @@ -8,7 +8,7 @@ SQLServerWriter插件实现了向sql server数据库写入数据。在底层实 SQLServerWriter通过github.com/denisenkom/go-mssqldb连接远程sql server数据库,并根据用户配置的信息和来自Reader的go-etl自定义的数据类型生成写入SQL语句,然后发送到远程sql server数据库执行。 -SQLServerWriter通过使用rdbmwriter中定义的查询流程调用go-etl自定义的storage/database的DBWrapper来实现具体的查询。DBWrapper封装了database/sql的众多接口,并且抽象出了数据库方言Dialect。其中sqlserver采取了storage/database/sqlserver实现的Dialect。 +SQLServerWriter通过使用dbmswriter中定义的查询流程调用go-etl自定义的storage/database的DBWrapper来实现具体的查询。DBWrapper封装了database/sql的众多接口,并且抽象出了数据库方言Dialect。其中sqlserver采取了storage/database/sqlserver实现的Dialect。 根据你配置的 `writeMode` 生成 diff --git a/datax/plugin/writer/sqlserver/job.go b/datax/plugin/writer/sqlserver/job.go index 1303440..d78b1fb 100644 --- a/datax/plugin/writer/sqlserver/job.go +++ b/datax/plugin/writer/sqlserver/job.go @@ -14,9 +14,9 @@ package sqlserver -import "github.com/Breeze0806/go-etl/datax/plugin/writer/rdbm" +import "github.com/Breeze0806/go-etl/datax/plugin/writer/dbms" //Job 工作 type Job struct { - *rdbm.Job + *dbms.Job } diff --git a/datax/plugin/writer/sqlserver/task.go b/datax/plugin/writer/sqlserver/task.go index 502a385..ab3fbb4 100644 --- a/datax/plugin/writer/sqlserver/task.go +++ b/datax/plugin/writer/sqlserver/task.go @@ -18,29 +18,29 @@ import ( "context" "github.com/Breeze0806/go-etl/datax/common/plugin" - "github.com/Breeze0806/go-etl/datax/plugin/writer/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/writer/dbms" "github.com/Breeze0806/go-etl/storage/database" "github.com/Breeze0806/go-etl/storage/database/sqlserver" ) var execModeMap = map[string]string{ - database.WriteModeInsert: rdbm.ExecModeNormal, - sqlserver.WriteModeCopyIn: rdbm.ExecModeStmt, + database.WriteModeInsert: dbms.ExecModeNormal, + sqlserver.WriteModeCopyIn: dbms.ExecModeStmt, } func execMode(writeMode string) string { if mode, ok := execModeMap[writeMode]; ok { return mode } - return rdbm.ExecModeNormal + return dbms.ExecModeNormal } //Task 任务 type Task struct { - *rdbm.Task + *dbms.Task } //StartWrite 开始写 func (t *Task) StartWrite(ctx context.Context, receiver plugin.RecordReceiver) (err error) { - return rdbm.StartWrite(ctx, rdbm.NewBaseBatchWriter(t.Task, execMode(t.Config.GetWriteMode()), nil), receiver) + return dbms.StartWrite(ctx, dbms.NewBaseBatchWriter(t.Task, execMode(t.Config.GetWriteMode()), nil), receiver) } diff --git a/datax/plugin/writer/sqlserver/task_test.go b/datax/plugin/writer/sqlserver/task_test.go index e694b47..a13ccd2 100644 --- a/datax/plugin/writer/sqlserver/task_test.go +++ b/datax/plugin/writer/sqlserver/task_test.go @@ -17,7 +17,7 @@ package sqlserver import ( "testing" - "github.com/Breeze0806/go-etl/datax/plugin/writer/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/writer/dbms" "github.com/Breeze0806/go-etl/storage/database" "github.com/Breeze0806/go-etl/storage/database/sqlserver" ) @@ -36,21 +36,21 @@ func Test_execMode(t *testing.T) { args: args{ writeMode: database.WriteModeInsert, }, - want: rdbm.ExecModeNormal, + want: dbms.ExecModeNormal, }, { name: "2", args: args{ writeMode: sqlserver.WriteModeCopyIn, }, - want: rdbm.ExecModeStmt, + want: dbms.ExecModeStmt, }, { name: "3", args: args{ writeMode: "", }, - want: rdbm.ExecModeNormal, + want: dbms.ExecModeNormal, }, } for _, tt := range tests { diff --git a/datax/plugin/writer/sqlserver/writer.go b/datax/plugin/writer/sqlserver/writer.go index 6ca0c2b..c9b5bce 100644 --- a/datax/plugin/writer/sqlserver/writer.go +++ b/datax/plugin/writer/sqlserver/writer.go @@ -17,7 +17,7 @@ package sqlserver import ( "github.com/Breeze0806/go-etl/config" spiwriter "github.com/Breeze0806/go-etl/datax/common/spi/writer" - "github.com/Breeze0806/go-etl/datax/plugin/writer/rdbm" + "github.com/Breeze0806/go-etl/datax/plugin/writer/dbms" "github.com/Breeze0806/go-etl/storage/database" ) @@ -34,8 +34,8 @@ func (w *Writer) ResourcesConfig() *config.JSON { //Job 工作 func (w *Writer) Job() spiwriter.Job { job := &Job{ - Job: rdbm.NewJob(rdbm.NewBaseDbHandler( - func(name string, conf *config.JSON) (e rdbm.Execer, err error) { + Job: dbms.NewJob(dbms.NewBaseDbHandler( + func(name string, conf *config.JSON) (e dbms.Execer, err error) { if e, err = database.Open(name, conf); err != nil { return nil, err } @@ -49,8 +49,8 @@ func (w *Writer) Job() spiwriter.Job { //Task 任务 func (w *Writer) Task() spiwriter.Task { task := &Task{ - Task: rdbm.NewTask(rdbm.NewBaseDbHandler( - func(name string, conf *config.JSON) (e rdbm.Execer, err error) { + Task: dbms.NewTask(dbms.NewBaseDbHandler( + func(name string, conf *config.JSON) (e dbms.Execer, err error) { if e, err = database.Open(name, conf); err != nil { return nil, err } From 36e0c02fa7fa252515b73434f443b747f055f173 Mon Sep 17 00:00:00 2001 From: Breeze0806 Date: Sun, 28 May 2023 09:27:17 +0800 Subject: [PATCH 03/10] =?UTF-8?q?feat&=20fix:=20=E6=96=B0=E5=A2=9E?= =?UTF-8?q?=E6=B5=81=E9=87=8F=E7=9B=91=E6=8E=A7=E5=92=8C=E4=BF=AE=E5=A4=8D?= =?UTF-8?q?=E9=99=90=E6=B5=81=E7=89=B9=E6=80=A7=E4=B8=AD=E7=9A=84=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 3 +- cmd/datax/examples/limit/csv.json | 54 ++++++++++ cmd/datax/examples/limit/main.go | 48 +++++++++ datax/common/config/core/const.go | 2 + datax/core/job/container.go | 25 ++++- datax/core/job/container_test.go | 100 ++++++++++++++++++- datax/core/taskgroup/container.go | 11 ++ datax/core/taskgroup/task_execer.go | 24 ++++- datax/core/transport/channel/channel.go | 6 +- datax/core/transport/channel/channel_test.go | 24 +++-- schedule/task_manager.go | 10 ++ 11 files changed, 285 insertions(+), 22 deletions(-) create mode 100644 cmd/datax/examples/limit/csv.json create mode 100644 cmd/datax/examples/limit/main.go diff --git a/.gitignore b/.gitignore index 10c58dc..cfbcfab 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,5 @@ datax/plugin_test.go datax/plugin.go *.txt *.out -*.exe \ No newline at end of file +*.exe +*.csv \ No newline at end of file diff --git a/cmd/datax/examples/limit/csv.json b/cmd/datax/examples/limit/csv.json new file mode 100644 index 0000000..6b382eb --- /dev/null +++ b/cmd/datax/examples/limit/csv.json @@ -0,0 +1,54 @@ +{ + "core" : { + "container": { + "job":{ + "id": 1, + "sleepInterval":100 + }, + "taskGroup":{ + "id": 1, + "failover":{ + "retryIntervalInMsec":0 + } + } + }, + "transport":{ + "channel":{ + "speed":{ + "byte": 100, + "record":100 + } + } + } + }, + "job":{ + "content":[ + { + "reader":{ + "name": "csvreader", + "parameter": { + "path":["examples/limit/src.csv"], + "encoding":"utf-8", + "delimiter":"," + } + }, + "writer":{ + "name": "csvwriter", + "parameter": { + "path":["examples/limit/dest.csv"], + "encoding":"utf-8", + "delimiter":"," + } + }, + "transformer":[] + } + ], + "setting":{ + "speed":{ + "byte":1024, + "record":100, + "channel":4 + } + } + } +} \ No newline at end of file diff --git a/cmd/datax/examples/limit/main.go b/cmd/datax/examples/limit/main.go new file mode 100644 index 0000000..1b847f7 --- /dev/null +++ b/cmd/datax/examples/limit/main.go @@ -0,0 +1,48 @@ +// Copyright 2020 the go-etl Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "encoding/base64" + "encoding/csv" + "fmt" + "os" + "strconv" + "time" + + "github.com/Breeze0806/go-etl/element" +) + +func main() { + f, err := os.Create("src.csv") + if err != nil { + fmt.Println("crete file fail. err:", err) + return + } + defer f.Close() + + w := csv.NewWriter(f) + for i := 0; i < 1000000; i++ { + record := []string{strconv.Itoa(i), + time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC).AddDate(0, 0, i/1000).Format(element.DefaultTimeFormat[:10]), + base64.StdEncoding.EncodeToString([]byte{byte(i / 100 / 100), byte((i / 100) % 100), byte(i % 100)}), + } + w.Write(record) + if (i+1)%1000 == 0 { + w.Flush() + } + } + w.Flush() +} diff --git a/datax/common/config/core/const.go b/datax/common/config/core/const.go index 8f26eca..29b1a68 100644 --- a/datax/common/config/core/const.go +++ b/datax/common/config/core/const.go @@ -40,6 +40,7 @@ var ( DataxCoreTransportChannelClass = "core.transport.channel.class" DataxCoreTransportChannelCapacity = "core.transport.channel.capacity" DataxCoreTransportChannelCapacityByte = "core.transport.channel.byteCapacity" + DataxCoreTransportChannelSpeed = "core.transport.channel.speed" DataxCoreTransportChannelSpeedByte = "core.transport.channel.speed.byte" DataxCoreTransportChannelSpeedRecord = "core.transport.channel.speed.record" DataxCoreTransportChannelFlowcontrolinterval = "core.transport.channel.flowControlInterval" @@ -59,6 +60,7 @@ var ( DataxJobSettingSpeedByte = "job.setting.speed.byte" DataxJobSettingSpeedRecord = "job.setting.speed.record" DataxJobSettingSpeedChannel = "job.setting.speed.channel" + DataxJobSettingSpeed = "job.setting.speed" DataxJobSettingErrorlimit = "job.setting.errorLimit" DataxJobSettingErrorlimitRecord = "job.setting.errorLimit.record" DataxJobSettingErrorlimitPercent = "job.setting.errorLimit.percentage" diff --git a/datax/core/job/container.go b/datax/core/job/container.go index d9ad13a..228a3b5 100644 --- a/datax/core/job/container.go +++ b/datax/core/job/container.go @@ -16,11 +16,13 @@ package job import ( "context" + "fmt" "math" "sort" "strconv" "strings" "sync" + "time" "github.com/Breeze0806/go-etl/config" coreconst "github.com/Breeze0806/go-etl/datax/common/config/core" @@ -282,9 +284,18 @@ func (c *Container) schedule() (err error) { taskGroups = append(taskGroups, taskGroup) go func(taskGroup *taskgroup.Container) { defer c.wg.Done() - select { - case taskGroup.Err = <-errChan: - case <-c.ctx.Done(): + for { + select { + case taskGroup.Err = <-errChan: + return + case <-c.ctx.Done(): + return + case <-time.After(1 * time.Second): + } + stats := taskGroup.Stats() + for _, v := range stats { + fmt.Println(v.String()) + } } }(taskGroup) } @@ -358,6 +369,13 @@ func (c *Container) distributeTaskIntoTaskGroup() (confs []*config.JSON, err err return } + var speed *config.JSON + speed, err = c.Config().GetConfig(coreconst.DataxJobSettingSpeed) + if err != nil { + return + } + + speed.Remove("channel") channelsPerTaskGroup := c.Config().GetInt64OrDefaullt(coreconst.DataxCoreContainerTaskgroupChannel, 5) channelNumber := c.needChannelNumber if channelNumber > int64(len(tasksConfigs)) { @@ -377,6 +395,7 @@ func (c *Container) distributeTaskIntoTaskGroup() (confs []*config.JSON, err err for i, v := range ss { for j, vj := range v { + tasksConfigs[vj].Set(coreconst.DataxCoreTransportChannelSpeed, speed) confs[i].Set(coreconst.DataxJobContent+"."+strconv.Itoa(j), tasksConfigs[vj]) } } diff --git a/datax/core/job/container_test.go b/datax/core/job/container_test.go index 605d039..f23dc88 100644 --- a/datax/core/job/container_test.go +++ b/datax/core/job/container_test.go @@ -2758,7 +2758,9 @@ func TestContainer_distributeTaskIntoTaskGroup(t *testing.T) { "job":{ "setting":{ "speed":{ - "channel":4 + "channel":4, + "byte":100, + "record":100 } }, "content":[ @@ -2894,7 +2896,9 @@ func TestContainer_distributeTaskIntoTaskGroup(t *testing.T) { "job":{ "setting":{ "speed":{ - "channel":4 + "channel":4, + "byte":100, + "record":100 } }, "content":[ @@ -2910,6 +2914,16 @@ func TestContainer_distributeTaskIntoTaskGroup(t *testing.T) { "loadBalanceResourceMark":"a", "id" : "A" } + }, + "core":{ + "transport":{ + "channel":{ + "speed":{ + "byte":100, + "record":100 + } + } + } } }, { @@ -2924,6 +2938,16 @@ func TestContainer_distributeTaskIntoTaskGroup(t *testing.T) { "loadBalanceResourceMark":"a", "id" : "E" } + }, + "core":{ + "transport":{ + "channel":{ + "speed":{ + "byte":100, + "record":100 + } + } + } } } ] @@ -2944,7 +2968,9 @@ func TestContainer_distributeTaskIntoTaskGroup(t *testing.T) { "job":{ "setting":{ "speed":{ - "channel":4 + "channel":4, + "byte":100, + "record":100 } }, "content":[ @@ -2960,6 +2986,16 @@ func TestContainer_distributeTaskIntoTaskGroup(t *testing.T) { "loadBalanceResourceMark":"a", "id" : "D" } + }, + "core":{ + "transport":{ + "channel":{ + "speed":{ + "byte":100, + "record":100 + } + } + } } }, { @@ -2974,6 +3010,16 @@ func TestContainer_distributeTaskIntoTaskGroup(t *testing.T) { "loadBalanceResourceMark":"a", "id" : "G" } + }, + "core":{ + "transport":{ + "channel":{ + "speed":{ + "byte":100, + "record":100 + } + } + } } } ] @@ -2994,7 +3040,9 @@ func TestContainer_distributeTaskIntoTaskGroup(t *testing.T) { "job":{ "setting":{ "speed":{ - "channel":4 + "channel":4, + "byte":100, + "record":100 } }, "content":[ @@ -3010,6 +3058,16 @@ func TestContainer_distributeTaskIntoTaskGroup(t *testing.T) { "loadBalanceResourceMark":"a", "id" : "F" } + }, + "core":{ + "transport":{ + "channel":{ + "speed":{ + "byte":100, + "record":100 + } + } + } } }, { @@ -3024,6 +3082,16 @@ func TestContainer_distributeTaskIntoTaskGroup(t *testing.T) { "loadBalanceResourceMark":"a", "id" : "C" } + }, + "core":{ + "transport":{ + "channel":{ + "speed":{ + "byte":100, + "record":100 + } + } + } } } ] @@ -3044,7 +3112,9 @@ func TestContainer_distributeTaskIntoTaskGroup(t *testing.T) { "job":{ "setting":{ "speed":{ - "channel":4 + "channel":4, + "byte":100, + "record":100 } }, "content":[ @@ -3060,6 +3130,16 @@ func TestContainer_distributeTaskIntoTaskGroup(t *testing.T) { "loadBalanceResourceMark":"a", "id" : "B" } + }, + "core":{ + "transport":{ + "channel":{ + "speed":{ + "byte":100, + "record":100 + } + } + } } }, { @@ -3074,6 +3154,16 @@ func TestContainer_distributeTaskIntoTaskGroup(t *testing.T) { "loadBalanceResourceMark":"a", "id" : "H" } + }, + "core":{ + "transport":{ + "channel":{ + "speed":{ + "byte":100, + "record":100 + } + } + } } } ] diff --git a/datax/core/taskgroup/container.go b/datax/core/taskgroup/container.go index 0afc605..919f284 100644 --- a/datax/core/taskgroup/container.go +++ b/datax/core/taskgroup/container.go @@ -87,6 +87,17 @@ func (c *Container) Do() error { return c.Start() } +//Stats 获取统计信息 +func (c *Container) Stats() (stats []Stats) { + for _, v := range c.tasks.manager.Runs() { + stat := v.(*taskExecer).Stats() + stat.JobID = c.jobID + stat.TaskGroupID = c.taskGroupID + stats = append(stats, stat) + } + return +} + //Start 开始运行,使用任务调度器执行这些JSON配置 func (c *Container) Start() (err error) { log.Infof("datax job(%v) taskgruop(%v) start", c.jobID, c.taskGroupID) diff --git a/datax/core/taskgroup/task_execer.go b/datax/core/taskgroup/task_execer.go index 85667e5..c075a31 100644 --- a/datax/core/taskgroup/task_execer.go +++ b/datax/core/taskgroup/task_execer.go @@ -16,6 +16,7 @@ package taskgroup import ( "context" + "encoding/json" "fmt" "sync" @@ -63,7 +64,7 @@ func newTaskExecer(ctx context.Context, taskConf *config.JSON, ctx: ctx, attemptCount: atomic.NewInt32(int32(attemptCount)), } - t.channel, err = channel.NewChannel(ctx, nil) + t.channel, err = channel.NewChannel(ctx, taskConf) if err != nil { return nil, err } @@ -249,3 +250,24 @@ Loop: log.Debugf("taskExecer %v shutdown writer", t.key) t.writerRunner.Shutdown() } + +//Stats 统计信息 +type Stats struct { + JobID int64 `json:"jobID"` + TaskGroupID int64 `json:"taskGroupID"` + TaskID int64 `json:"taskID"` + Channel channel.StatsJSON `json:"channel"` +} + +func (s *Stats) String() string { + data, _ := json.MarshalIndent(s, "", " ") + return string(data) +} + +//Stats 获取统计信息 +func (t *taskExecer) Stats() Stats { + return Stats{ + TaskID: t.taskID, + Channel: t.channel.StatsJSON(), + } +} diff --git a/datax/core/transport/channel/channel.go b/datax/core/transport/channel/channel.go index 3f8bd0e..1e78d9f 100644 --- a/datax/core/transport/channel/channel.go +++ b/datax/core/transport/channel/channel.go @@ -16,6 +16,7 @@ package channel import ( "context" + "fmt" "sync" "github.com/Breeze0806/go-etl/config" @@ -74,9 +75,10 @@ func NewChannel(ctx context.Context, conf *config.JSON) (*Channel, error) { r := -1 b := -1.0 if conf != nil { - b = conf.GetFloat64OrDefaullt(coreconst.DataxJobSettingSpeedByte, -1.0) - r = int(conf.GetInt64OrDefaullt(coreconst.DataxJobSettingSpeedRecord, -1)) + b = conf.GetFloat64OrDefaullt(coreconst.DataxCoreTransportChannelSpeedByte, -1.0) + r = int(conf.GetInt64OrDefaullt(coreconst.DataxCoreTransportChannelSpeedRecord, -1)) } + fmt.Println(b, r) var limiter *rate.Limiter if b > 0 { limiter = rate.NewLimiter(rate.Limit(b), int(b)) diff --git a/datax/core/transport/channel/channel_test.go b/datax/core/transport/channel/channel_test.go index f936af3..8f47a43 100644 --- a/datax/core/transport/channel/channel_test.go +++ b/datax/core/transport/channel/channel_test.go @@ -77,11 +77,13 @@ func TestChannel_PushAllPopAll(t *testing.T) { func TestChannelWithRateLimit(t *testing.T) { conf, _ := config.NewJSONFromString(`{ - "job":{ - "setting":{ - "speed":{ - "byte":10000, - "record":10 + "core":{ + "transport":{ + "channel":{ + "speed":{ + "byte":10000, + "record":10 + } } } } @@ -128,11 +130,13 @@ func TestChannelWithRateLimit(t *testing.T) { func TestChannelWithRateLimit_Err(t *testing.T) { conf, _ := config.NewJSONFromString(`{ - "job":{ - "setting":{ - "speed":{ - "byte":10000, - "record":10 + "core":{ + "transport":{ + "channel":{ + "speed":{ + "byte":10000, + "record":10 + } } } } diff --git a/schedule/task_manager.go b/schedule/task_manager.go index c5456a9..275021c 100644 --- a/schedule/task_manager.go +++ b/schedule/task_manager.go @@ -47,6 +47,16 @@ func (t *MappedTaskManager) Size() int { return t.lockedSize() } +//Runs 获取当前在跑的任务 +func (t *MappedTaskManager) Runs() (tasks []MappedTask) { + t.Lock() + for _, v := range t.run { + tasks = append(tasks, v) + } + t.Unlock() + return +} + //lockedSize 未加锁的任务数 func (t *MappedTaskManager) lockedSize() int { return t.num From 54dd2c48e620f20b025390ef3cc67693ea5d9ade Mon Sep 17 00:00:00 2001 From: Breeze0806 Date: Sun, 28 May 2023 12:07:21 +0800 Subject: [PATCH 04/10] =?UTF-8?q?feat=20&=20test:=20=E4=BF=AE=E5=A4=8D?= =?UTF-8?q?=E6=89=93=E5=8D=B0=E7=BB=93=E6=9E=9C=E5=92=8C=E6=B5=8B=E8=AF=95?= =?UTF-8?q?1.19,1.20?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/Build.yml | 2 +- .gitignore | 4 ++-- Makefile | 2 +- datax/core/job/container.go | 2 +- datax/core/taskgroup/task_execer.go | 2 +- datax/core/transport/channel/channel.go | 2 -- 6 files changed, 6 insertions(+), 8 deletions(-) diff --git a/.github/workflows/Build.yml b/.github/workflows/Build.yml index b729d68..fc095d3 100644 --- a/.github/workflows/Build.yml +++ b/.github/workflows/Build.yml @@ -14,7 +14,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - go: [ '1.16', '1.17' , '1.18'] + go: [ '1.16', '1.17' , '1.18', '1.19', '1.20'] name: Go ${{ matrix.go }} sample steps: - uses: actions/checkout@v3 diff --git a/.gitignore b/.gitignore index cfbcfab..244b3a1 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ cmd/datax/*.log cmd/datax/run.* cmd/datax/version.go cmd/datax/tools/testData/config +cmd/datax/examples/**/*.csv datax/plugin/plugin.go datax/plugin/reader/**/plugin.go datax/plugin/writer/**/plugin.go @@ -11,5 +12,4 @@ datax/plugin_test.go datax/plugin.go *.txt *.out -*.exe -*.csv \ No newline at end of file +*.exe \ No newline at end of file diff --git a/Makefile b/Makefile index 1c9a774..4924c52 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ export LD_LIBRARY_PATH=${DB2HOME}/lib # stable release. GO_VERSION := $(shell go version | cut -d " " -f 3) GO_MINOR_VERSION := $(word 2,$(subst ., ,$(GO_VERSION))) -LINTABLE_MINOR_VERSIONS := 18 +LINTABLE_MINOR_VERSIONS := 20 ifneq ($(filter $(LINTABLE_MINOR_VERSIONS),$(GO_MINOR_VERSION)),) SHOULD_LINT := true endif diff --git a/datax/core/job/container.go b/datax/core/job/container.go index 228a3b5..f6a7402 100644 --- a/datax/core/job/container.go +++ b/datax/core/job/container.go @@ -294,7 +294,7 @@ func (c *Container) schedule() (err error) { } stats := taskGroup.Stats() for _, v := range stats { - fmt.Println(v.String()) + fmt.Printf("%s\r", v.String()) } } }(taskGroup) diff --git a/datax/core/taskgroup/task_execer.go b/datax/core/taskgroup/task_execer.go index c075a31..c9f7a9a 100644 --- a/datax/core/taskgroup/task_execer.go +++ b/datax/core/taskgroup/task_execer.go @@ -260,7 +260,7 @@ type Stats struct { } func (s *Stats) String() string { - data, _ := json.MarshalIndent(s, "", " ") + data, _ := json.Marshal(s) return string(data) } diff --git a/datax/core/transport/channel/channel.go b/datax/core/transport/channel/channel.go index 1e78d9f..3f2049c 100644 --- a/datax/core/transport/channel/channel.go +++ b/datax/core/transport/channel/channel.go @@ -16,7 +16,6 @@ package channel import ( "context" - "fmt" "sync" "github.com/Breeze0806/go-etl/config" @@ -78,7 +77,6 @@ func NewChannel(ctx context.Context, conf *config.JSON) (*Channel, error) { b = conf.GetFloat64OrDefaullt(coreconst.DataxCoreTransportChannelSpeedByte, -1.0) r = int(conf.GetInt64OrDefaullt(coreconst.DataxCoreTransportChannelSpeedRecord, -1)) } - fmt.Println(b, r) var limiter *rate.Limiter if b > 0 { limiter = rate.NewLimiter(rate.Limit(b), int(b)) From dabf7c402822ea81478d7d504a637770223eaa11 Mon Sep 17 00:00:00 2001 From: "fhzxfxd@126.com" Date: Sun, 28 May 2023 20:34:24 +0800 Subject: [PATCH 05/10] =?UTF-8?q?test=20&=20fix:=20=E4=BF=AE=E5=A4=8D?= =?UTF-8?q?=E9=80=9A=E9=81=93=E6=95=B0=E6=97=A0=E6=B3=95=E7=94=9F=E6=95=88?= =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98=EF=BC=8C=E6=B5=8B=E8=AF=95split?= =?UTF-8?q?=E7=9A=84=E9=BB=98=E8=AE=A4=E5=80=BC=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 4 +- README_USER.md | 23 ++++--- cmd/datax/examples/csvpostgres/config.json | 14 ----- cmd/datax/examples/db2/config.json | 14 ----- cmd/datax/examples/limit/csv.json | 14 ----- cmd/datax/examples/mysql/config.json | 14 ----- cmd/datax/examples/oracle/config.json | 14 ----- cmd/datax/examples/postgres/config.json | 14 ----- cmd/datax/examples/postgrescsv/config.json | 14 ----- cmd/datax/examples/postgresxlsx/config.json | 14 ----- cmd/datax/examples/prePostSql/mysql.json | 14 ----- cmd/datax/examples/split/mysql-default.json | 68 +++++++++++++++++++++ cmd/datax/examples/split/mysql.json | 14 ----- cmd/datax/examples/sqlserver/config.json | 14 ----- cmd/datax/examples/xlsxpostgres/config.json | 6 -- datax/core/job/container.go | 22 ++++--- datax/core/job/container_test.go | 24 ++------ 17 files changed, 102 insertions(+), 199 deletions(-) create mode 100644 cmd/datax/examples/split/mysql-default.json diff --git a/README.md b/README.md index d7d4749..4d651da 100644 --- a/README.md +++ b/README.md @@ -50,8 +50,8 @@ go-etl将提供的etl能力如下: #### plan - [x] 实现关系型数据库的任务切分 -- [ ] 实现监控模块 -- [ ] 实现流控模块 +- [x] 实现监控模块 +- [x] 实现流控模块 - [ ] 实现关系型数据库入库断点续传 ### storage diff --git a/README_USER.md b/README_USER.md index 5f50451..7812219 100644 --- a/README_USER.md +++ b/README_USER.md @@ -76,14 +76,6 @@ datax -c tools/testData/xlsx.json -w tools/testData/wizard.csv "retryIntervalInMsec":0 } } - }, - "transport":{ - "channel":{ - "speed":{ - "byte": 100, - "record":100 - } - } } }, "job":{ @@ -139,8 +131,21 @@ datax -c tools/testData/xlsx.json -w tools/testData/wizard.csv ``` #### 流控配置 -之前speed的byte和record配置并不会生效,现在加入流控特性后,byte和record将会生效,byte会限制缓存消息字节数,而record会限制缓存消息条数,如果byte设置过小会导致缓存过小而导致同步数据失败。当byte为0或负数时,限制器将不会工作。 +之前speed的byte和record配置并不会生效,现在加入流控特性后,byte和record将会生效,byte会限制缓存消息字节数,而record会限制缓存消息条数,如果byte设置过小会导致缓存过小而导致同步数据失败。当byte为0或负数时,限制器将不会工作,例如byte为10485760,现在为10Mb(10*1024*1024)。 +```json +{ + "job":{ + "setting":{ + "speed":{ + "byte":, + "record":10485760, + "channel":4 + } + } + } +} +``` #### 源目的配置向导文件 源目的配置向导文件是csv文件,每行配置可以配置如下: diff --git a/cmd/datax/examples/csvpostgres/config.json b/cmd/datax/examples/csvpostgres/config.json index 4f02526..0f5db49 100644 --- a/cmd/datax/examples/csvpostgres/config.json +++ b/cmd/datax/examples/csvpostgres/config.json @@ -4,20 +4,6 @@ "job":{ "id": 1, "sleepInterval":100 - }, - "taskGroup":{ - "id": 1, - "failover":{ - "retryIntervalInMsec":0 - } - } - }, - "transport":{ - "channel":{ - "speed":{ - "byte": 100, - "record":100 - } } } }, diff --git a/cmd/datax/examples/db2/config.json b/cmd/datax/examples/db2/config.json index db1545b..25b5dff 100644 --- a/cmd/datax/examples/db2/config.json +++ b/cmd/datax/examples/db2/config.json @@ -4,20 +4,6 @@ "job":{ "id": 1, "sleepInterval":100 - }, - "taskGroup":{ - "id": 1, - "failover":{ - "retryIntervalInMsec":0 - } - } - }, - "transport":{ - "channel":{ - "speed":{ - "byte": 100, - "record":100 - } } } }, diff --git a/cmd/datax/examples/limit/csv.json b/cmd/datax/examples/limit/csv.json index 6b382eb..0c58e1f 100644 --- a/cmd/datax/examples/limit/csv.json +++ b/cmd/datax/examples/limit/csv.json @@ -4,20 +4,6 @@ "job":{ "id": 1, "sleepInterval":100 - }, - "taskGroup":{ - "id": 1, - "failover":{ - "retryIntervalInMsec":0 - } - } - }, - "transport":{ - "channel":{ - "speed":{ - "byte": 100, - "record":100 - } } } }, diff --git a/cmd/datax/examples/mysql/config.json b/cmd/datax/examples/mysql/config.json index 1c09f4c..f4c5482 100644 --- a/cmd/datax/examples/mysql/config.json +++ b/cmd/datax/examples/mysql/config.json @@ -4,20 +4,6 @@ "job":{ "id": 1, "sleepInterval":100 - }, - "taskGroup":{ - "id": 1, - "failover":{ - "retryIntervalInMsec":0 - } - } - }, - "transport":{ - "channel":{ - "speed":{ - "byte": 100, - "record":100 - } } } }, diff --git a/cmd/datax/examples/oracle/config.json b/cmd/datax/examples/oracle/config.json index 3a4ff80..a6d7461 100644 --- a/cmd/datax/examples/oracle/config.json +++ b/cmd/datax/examples/oracle/config.json @@ -4,20 +4,6 @@ "job":{ "id": 1, "sleepInterval":100 - }, - "taskGroup":{ - "id": 1, - "failover":{ - "retryIntervalInMsec":0 - } - } - }, - "transport":{ - "channel":{ - "speed":{ - "byte": 100, - "record":100 - } } } }, diff --git a/cmd/datax/examples/postgres/config.json b/cmd/datax/examples/postgres/config.json index 7fc0ead..802efd9 100644 --- a/cmd/datax/examples/postgres/config.json +++ b/cmd/datax/examples/postgres/config.json @@ -4,20 +4,6 @@ "job":{ "id": 1, "sleepInterval":100 - }, - "taskGroup":{ - "id": 1, - "failover":{ - "retryIntervalInMsec":0 - } - } - }, - "transport":{ - "channel":{ - "speed":{ - "byte": 100, - "record":100 - } } } }, diff --git a/cmd/datax/examples/postgrescsv/config.json b/cmd/datax/examples/postgrescsv/config.json index ac50220..02ca115 100644 --- a/cmd/datax/examples/postgrescsv/config.json +++ b/cmd/datax/examples/postgrescsv/config.json @@ -4,20 +4,6 @@ "job":{ "id": 1, "sleepInterval":100 - }, - "taskGroup":{ - "id": 1, - "failover":{ - "retryIntervalInMsec":0 - } - } - }, - "transport":{ - "channel":{ - "speed":{ - "byte": 100, - "record":100 - } } } }, diff --git a/cmd/datax/examples/postgresxlsx/config.json b/cmd/datax/examples/postgresxlsx/config.json index 2cc8bdc..7d3d03b 100644 --- a/cmd/datax/examples/postgresxlsx/config.json +++ b/cmd/datax/examples/postgresxlsx/config.json @@ -4,20 +4,6 @@ "job":{ "id": 1, "sleepInterval":100 - }, - "taskGroup":{ - "id": 1, - "failover":{ - "retryIntervalInMsec":0 - } - } - }, - "transport":{ - "channel":{ - "speed":{ - "byte": 100, - "record":100 - } } } }, diff --git a/cmd/datax/examples/prePostSql/mysql.json b/cmd/datax/examples/prePostSql/mysql.json index 6a7b9ea..e0322be 100644 --- a/cmd/datax/examples/prePostSql/mysql.json +++ b/cmd/datax/examples/prePostSql/mysql.json @@ -4,20 +4,6 @@ "job":{ "id": 1, "sleepInterval":100 - }, - "taskGroup":{ - "id": 1, - "failover":{ - "retryIntervalInMsec":0 - } - } - }, - "transport":{ - "channel":{ - "speed":{ - "byte": 100, - "record":100 - } } } }, diff --git a/cmd/datax/examples/split/mysql-default.json b/cmd/datax/examples/split/mysql-default.json new file mode 100644 index 0000000..8707326 --- /dev/null +++ b/cmd/datax/examples/split/mysql-default.json @@ -0,0 +1,68 @@ +{ + "core" : { + "container": { + "job":{ + "id": 1, + "sleepInterval":100 + } + } + }, + "job":{ + "content":[ + { + "reader":{ + "name": "mysqlreader", + "parameter": { + "username": "root", + "password": "123456", + "split" : { + "key":"id", + "range":{ + "type":"bigInt", + "left":"100000", + "right":"900000" + } + }, + "column": ["*"], + "connection": { + "url": "tcp(192.168.15.130:3306)/source?parseTime=false", + "table": { + "db":"source", + "name":"split" + } + }, + "where": "" + } + }, + "writer":{ + "name": "mysqlwriter", + "parameter": { + "username": "root", + "password": "123456", + "writeMode": "insert", + "column": ["*"], + "session": [], + "preSql": [], + "connection": { + "url": "tcp(192.168.15.130:3306)/mysql?parseTime=false", + "table": { + "db":"destination", + "name":"split" + } + }, + "batchTimeout": "1s", + "batchSize":1000 + } + }, + "transformer":[] + } + ], + "setting":{ + "speed":{ + "byte":0, + "record":1024, + "channel":10 + } + } + } +} \ No newline at end of file diff --git a/cmd/datax/examples/split/mysql.json b/cmd/datax/examples/split/mysql.json index 703a752..3e858af 100644 --- a/cmd/datax/examples/split/mysql.json +++ b/cmd/datax/examples/split/mysql.json @@ -4,20 +4,6 @@ "job":{ "id": 1, "sleepInterval":100 - }, - "taskGroup":{ - "id": 1, - "failover":{ - "retryIntervalInMsec":0 - } - } - }, - "transport":{ - "channel":{ - "speed":{ - "byte": 100, - "record":100 - } } } }, diff --git a/cmd/datax/examples/sqlserver/config.json b/cmd/datax/examples/sqlserver/config.json index 6ac1797..e952e87 100644 --- a/cmd/datax/examples/sqlserver/config.json +++ b/cmd/datax/examples/sqlserver/config.json @@ -4,20 +4,6 @@ "job":{ "id": 1, "sleepInterval":100 - }, - "taskGroup":{ - "id": 1, - "failover":{ - "retryIntervalInMsec":0 - } - } - }, - "transport":{ - "channel":{ - "speed":{ - "byte": 100, - "record":100 - } } } }, diff --git a/cmd/datax/examples/xlsxpostgres/config.json b/cmd/datax/examples/xlsxpostgres/config.json index 1977549..8bcbbf8 100644 --- a/cmd/datax/examples/xlsxpostgres/config.json +++ b/cmd/datax/examples/xlsxpostgres/config.json @@ -4,12 +4,6 @@ "job":{ "id": 1, "sleepInterval":100 - }, - "taskGroup":{ - "id": 1, - "failover":{ - "retryIntervalInMsec":0 - } } } }, diff --git a/datax/core/job/container.go b/datax/core/job/container.go index f6a7402..de24780 100644 --- a/datax/core/job/container.go +++ b/datax/core/job/container.go @@ -268,6 +268,7 @@ func (c *Container) schedule() (err error) { coreconst.DataxCoreContainerJobMaxWorkerNumber, 4)), len(tasksConfigs)) defer c.taskSchduler.Stop() var taskGroups []*taskgroup.Container + for i := range tasksConfigs { var taskGroup *taskgroup.Container taskGroup, err = taskgroup.NewContainer(c.ctx, tasksConfigs[i]) @@ -294,7 +295,7 @@ func (c *Container) schedule() (err error) { } stats := taskGroup.Stats() for _, v := range stats { - fmt.Printf("%s\r", v.String()) + fmt.Printf("\n%s\r", v.String()) } } }(taskGroup) @@ -408,6 +409,12 @@ func (c *Container) adjustChannelNumber() error { var needChannelNumberByByte int64 = math.MaxInt32 var needChannelNumberByRecord int64 = math.MaxInt32 + if isChannelLimit := c.Config().GetInt64OrDefaullt(coreconst.DataxJobSettingSpeedChannel, 0) > 0; isChannelLimit { + c.needChannelNumber, _ = c.Config().GetInt64(coreconst.DataxJobSettingSpeedChannel) + log.Infof("DataX jobContainer %v set Channel-Number to %v channels.", c.jobID, c.needChannelNumber) + return nil + } + if isByteLimit := c.Config().GetInt64OrDefaullt(coreconst.DataxJobSettingSpeedByte, 0) > 0; isByteLimit { globalLimitedByteSpeed := c.Config().GetInt64OrDefaullt(coreconst.DataxJobSettingSpeedByte, 10*1024*1024) channelLimitedByteSpeed, err := c.Config().GetInt64(coreconst.DataxCoreTransportChannelSpeedByte) @@ -450,13 +457,12 @@ func (c *Container) adjustChannelNumber() error { return nil } - if isChannelLimit := c.Config().GetInt64OrDefaullt(coreconst.DataxJobSettingSpeedChannel, 0) > 0; isChannelLimit { - //此时 DataxJobSettingSpeedChannel必然存在 - c.needChannelNumber, _ = c.Config().GetInt64(coreconst.DataxJobSettingSpeedChannel) - log.Infof("DataX jobContainer %v set Channel-Number to %v channels.", c.jobID, c.needChannelNumber) - return nil - } - + // if isChannelLimit := c.Config().GetInt64OrDefaullt(coreconst.DataxJobSettingSpeedChannel, 0) > 0; isChannelLimit { + // //此时 DataxJobSettingSpeedChannel必然存在 + // c.needChannelNumber, _ = c.Config().GetInt64(coreconst.DataxJobSettingSpeedChannel) + // log.Infof("DataX jobContainer %v set Channel-Number to %v channels.", c.jobID, c.needChannelNumber) + // return nil + // } return errors.New("job speed should be setted") } diff --git a/datax/core/job/container_test.go b/datax/core/job/container_test.go index f23dc88..1e49997 100644 --- a/datax/core/job/container_test.go +++ b/datax/core/job/container_test.go @@ -1220,7 +1220,7 @@ func TestContainer_adjustChannelNumber(t *testing.T) { "speed":{ "byte":1, "record":1, - "channel":4 + "channel":0 } } } @@ -1251,7 +1251,7 @@ func TestContainer_adjustChannelNumber(t *testing.T) { "speed":{ "byte":400, "record":3000, - "channel":4 + "channel":0 } } } @@ -1864,25 +1864,9 @@ func TestContainer_split(t *testing.T) { ] } }`)), - wantErr: true, + wantErr: false, wantConfig: testJSONFromString(`{ - "content":[ - { - "reader":{ - "name": "mock", - "parameter" : { - - } - }, - "writer":{ - "name": "mock", - "parameter" : { - - } - }, - "transformer" : ["1","2"] - } - ] + "content":[{"reader":{"name":"mock","parameter":{"id":1}},"writer":{"name":"mock","parameter":{"id":4}},"transformer":["1","2"],"taskId":0},{"reader":{"name":"mock","parameter":{"id":2}},"writer":{"name":"mock","parameter":{"id":5}},"transformer":["1","2"],"taskId":1},{"reader":{"name":"mock","parameter":{"id":3}},"writer":{"name":"mock","parameter":{"id":6}},"transformer":["1","2"],"taskId":2}] }`), }, { From 0b431cfd96852c78c0b0f65294511829592fdc88 Mon Sep 17 00:00:00 2001 From: "fhzxfxd@126.com" Date: Sun, 28 May 2023 21:10:49 +0800 Subject: [PATCH 06/10] =?UTF-8?q?fix:=20=E8=B0=83=E6=95=B4=E9=80=9A?= =?UTF-8?q?=E9=81=93=E6=95=B0=E4=B8=BA4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cmd/datax/examples/split/mysql-default.json | 2 +- datax/core/job/container.go | 12 +++++++++--- datax/core/taskgroup/container.go | 10 +++++----- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/cmd/datax/examples/split/mysql-default.json b/cmd/datax/examples/split/mysql-default.json index 8707326..ef111f0 100644 --- a/cmd/datax/examples/split/mysql-default.json +++ b/cmd/datax/examples/split/mysql-default.json @@ -61,7 +61,7 @@ "speed":{ "byte":0, "record":1024, - "channel":10 + "channel":4 } } } diff --git a/datax/core/job/container.go b/datax/core/job/container.go index de24780..a66bb35 100644 --- a/datax/core/job/container.go +++ b/datax/core/job/container.go @@ -284,20 +284,26 @@ func (c *Container) schedule() (err error) { } taskGroups = append(taskGroups, taskGroup) go func(taskGroup *taskgroup.Container) { - defer c.wg.Done() + defer func() { + fmt.Printf("\n") + c.wg.Done() + }() + // timer := time.NewTimer(taskGroup.SleepInterval) + // defer timer.Stop() for { select { case taskGroup.Err = <-errChan: return case <-c.ctx.Done(): return - case <-time.After(1 * time.Second): + case <-time.After(taskGroup.SleepInterval): } stats := taskGroup.Stats() for _, v := range stats { - fmt.Printf("\n%s\r", v.String()) + fmt.Printf("%s\r", v.String()) } } + }(taskGroup) } End: diff --git a/datax/core/taskgroup/container.go b/datax/core/taskgroup/container.go index 919f284..7c14ca4 100644 --- a/datax/core/taskgroup/container.go +++ b/datax/core/taskgroup/container.go @@ -39,7 +39,7 @@ type Container struct { wg sync.WaitGroup tasks *taskManager ctx context.Context - sleepInterval time.Duration + SleepInterval time.Duration retryInterval time.Duration retryMaxCount int32 } @@ -62,13 +62,13 @@ func NewContainer(ctx context.Context, conf *config.JSON) (c *Container, err err return nil, err } - c.sleepInterval = time.Duration( - c.Config().GetInt64OrDefaullt(coreconst.DataxCoreContainerJobSleepinterval, 100)) * time.Millisecond + c.SleepInterval = time.Duration( + c.Config().GetInt64OrDefaullt(coreconst.DataxCoreContainerJobSleepinterval, 1000)) * time.Millisecond c.retryInterval = time.Duration( c.Config().GetInt64OrDefaullt(coreconst.DataxCoreContainerTaskFailoverMaxretrytimes, 10000)) * time.Millisecond c.retryMaxCount = int32(c.Config().GetInt64OrDefaullt(coreconst.DataxCoreContainerTaskFailoverMaxretrytimes, 1)) log.Infof("datax job(%v) taskgruop(%v) sleepInterval: %v retryInterval: %v retryMaxCount: %v config: %v", - c.jobID, c.taskGroupID, c.sleepInterval, c.retryInterval, c.retryMaxCount, conf) + c.jobID, c.taskGroupID, c.SleepInterval, c.retryInterval, c.retryMaxCount, conf) return } @@ -135,7 +135,7 @@ func (c *Container) Start() (err error) { } } log.Infof("datax job(%v) taskgruop(%v) manage tasks", c.jobID, c.taskGroupID) - ticker := time.NewTicker(c.sleepInterval) + ticker := time.NewTicker(c.SleepInterval) defer ticker.Stop() QueueLoop: //任务队列不为空 From 90ee4144ba1c05f4d036377066a506fcfcfb7f89 Mon Sep 17 00:00:00 2001 From: Breeze0806 Date: Sat, 10 Jun 2023 17:28:04 +0800 Subject: [PATCH 07/10] =?UTF-8?q?feat=20&=20docs:=201.=20=E4=BF=AE?= =?UTF-8?q?=E6=94=B9datax=E5=90=8C=E6=AD=A5=E5=B7=A5=E5=85=B7=E7=9A=84?= =?UTF-8?q?=E6=97=A5=E5=BF=97=E4=B8=BA=E8=BF=BD=E5=8A=A0=EF=BC=8C=E5=B9=B6?= =?UTF-8?q?=E4=B8=94=E5=BF=BD=E7=95=A5=E9=99=90=E5=88=B6=E9=80=9F=E5=BA=A6?= =?UTF-8?q?=E3=80=822.=E4=BF=AE=E6=94=B9go-etl=E7=9A=84=E7=94=A8=E6=88=B7?= =?UTF-8?q?=E6=89=8B=E5=86=8C=EF=BC=8C=E4=BF=AE=E6=94=B9go-etl=E7=9A=84?= =?UTF-8?q?=E5=BC=80=E5=8F=91=E6=89=8B=E5=86=8C=E7=9A=84=E5=85=B3=E7=B3=BB?= =?UTF-8?q?=E5=9E=8B=E6=95=B0=E6=8D=AE=E5=BA=93=E4=B8=BA=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E5=BA=93=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 6 +- README_USER.md | 376 ++++++++++++-------- cmd/datax/examples/csvpostgres/config.json | 9 +- cmd/datax/examples/db2/config.json | 9 +- cmd/datax/examples/limit/csv.json | 9 +- cmd/datax/examples/mysql/config.json | 9 +- cmd/datax/examples/oracle/config.json | 9 +- cmd/datax/examples/postgres/config.json | 9 +- cmd/datax/examples/postgrescsv/config.json | 9 +- cmd/datax/examples/postgresxlsx/config.json | 9 +- cmd/datax/examples/prePostSql/mysql.json | 9 +- cmd/datax/examples/split/csv.json | 14 - cmd/datax/examples/sqlserver/config.json | 9 +- cmd/datax/examples/xlsxpostgres/config.json | 9 +- cmd/datax/log.go | 2 +- cmd/datax/main.go | 2 + datax/README.md | 45 ++- datax/core/job/container.go | 14 +- storage/database/README.md | 4 +- tools/datax/build/main.go | 3 + tools/datax/plugin/main.go | 12 + tools/license/main.go | 2 + 22 files changed, 305 insertions(+), 274 deletions(-) diff --git a/README.md b/README.md index 4d651da..01342c6 100644 --- a/README.md +++ b/README.md @@ -38,14 +38,14 @@ go-etl将提供的etl能力如下: 使用[go-etl用户手册](README_USER.md)开始数据同步 -### 开发宝典 +### 数据同步开发宝典 -参考[go-etl开发者文档](datax/README.md)来帮助开发 +参考[go-etl数据同步开发者文档](datax/README.md)来帮助开发 ## 模块简介 ### datax -本包将提供类似于阿里巴巴[DataX](https://github.com/alibaba/DataX)的接口去实现go的etl框架,目前主要实现了job框架内的数据同步能力,监控等功能还未实现. +本包将提供类似于阿里巴巴[DataX](https://github.com/alibaba/DataX)的接口去实现go的etl框架,目前主要实现了job框架内的数据同步能力. #### plan diff --git a/README_USER.md b/README_USER.md index 7812219..2d2791a 100644 --- a/README_USER.md +++ b/README_USER.md @@ -1,67 +1,55 @@ # go-etl用户手册 -## 从源码进行编译 +go-etl的datax是一个数据同步工具,目前支持MySQL,postgres,oracle,SQL SERVER,DB2等主流关系型数据库以及csv,xlsx文件之间的数据同步。 -### linux +## 1 从源码进行编译 -#### 依赖 +### 1.1 linux + +#### 1.1.1 依赖 1. golang 1.16以及以上 -#### 构建 +#### 1.1.2 构建 ```bash make dependencies make release ``` -### windows +### 1.2 windows -#### 依赖 +#### 1.2.1 依赖 1. 需要mingw-w64 with gcc 7.2.0以上的环境进行编译 2. golang 1.16以及以上 3. 最小编译环境为win7 -#### 构建 +#### 1.2.2 构建 ```bash release.bat ``` -## 如何开始 +## 2 如何开始 下载对应操作系统的datax,在linux下如Makefile所示export LD_LIBRARY_PATH=${DB2HOME}/lib,否则无法运行 可以使用[ibm db2](https://public.dhe.ibm.com/ibmdl/export/pub/software/data/db2/drivers/odbc_cli/)以及[oracle](https://www.oracle.com/database/technologies/instant-client/downloads.html)下载到对应64位版本odbc依赖,也可以在**QQ群185188648**群共享中中下载到。 -### 查看版本 - -``` -datax version -v0.1.0 (git commit: c82eb302218f38cd3851df4b425256e93f85160d) complied by go version go1.16.5 windows/amd64 -``` - -### 使用方式 +### 2.1 单任务数据同步 +调用datax十分简单,只要直接调用它即可 ```bash -Usage of datax: - -c string - config (default "config.json") - -w string - wizard +data -c config.json ``` +-c 指定数据源配置文件 -### 批量生成配置集和执行脚本 - -```bash -datax -c tools/testData/xlsx.json -w tools/testData/wizard.csv -``` --c 指定数据源配置文件 -w 指定源目的配置向导文件。 +当返回值是0,并且显示run success,表示执行成功 -执行结果会在数据源配置文件目录文件生成源目的配置向导文件行数的配置集,分别以指定数据源配置文件1.json,指定数据源配置文件2.json,...,指定数据源配置文件[n].json的配置集。 +当返回值是1,并且显示run fail,并告知执行失败的原因 -另外,在当前目录会生成执行脚本run.bat或者run.sh。 -#### 数据源配置文件 +#### 2.1.1 数据源配置文件 数据源配置文件是json文件,使用数据源相互组合,如从mysql同步到postgres中 + ```json { "core" : { @@ -69,12 +57,6 @@ datax -c tools/testData/xlsx.json -w tools/testData/wizard.csv "job":{ "id": 1, "sleepInterval":100 - }, - "taskGroup":{ - "id": 1, - "failover":{ - "retryIntervalInMsec":0 - } } } }, @@ -118,103 +100,26 @@ datax -c tools/testData/xlsx.json -w tools/testData/wizard.csv }, "transformer":[] } - ], - "setting":{ - "speed":{ - "byte":0, - "record":1024, - "channel":4 - } - } + ] } } -``` -#### 流控配置 - -之前speed的byte和record配置并不会生效,现在加入流控特性后,byte和record将会生效,byte会限制缓存消息字节数,而record会限制缓存消息条数,如果byte设置过小会导致缓存过小而导致同步数据失败。当byte为0或负数时,限制器将不会工作,例如byte为10485760,现在为10Mb(10*1024*1024)。 -```json -{ - "job":{ - "setting":{ - "speed":{ - "byte":, - "record":10485760, - "channel":4 - } - } - } -} - ``` -#### 源目的配置向导文件 -源目的配置向导文件是csv文件,每行配置可以配置如下: -```csv -path[table],path[table] -``` -每一列可以是路径或者是表名,注意所有的表要配置库名或者模式名,需要在数据源配置文件配置。 +`reader`和`writer`的配置如下: -#### 测试结果 -可以运行cmd/datax/testData的测试数据 -```bash -cd cmd/datax -datax -c testData/xlsx.json -w testData/wizard.csv -``` -结果会在testData下生成wizard.csv行数的配置文件,分别以xlsx1.json,xlsx2.json,...,xlsx[n].json的配置集。 - -### 数据同步 - -调用datax十分简单,只要直接调用它即可 - -```bash -data -c config.json -``` --c 指定数据源配置文件 +| 类型 | 数据源 | Reader(读) | Writer(写) | 文档 | +| ------------ | ------------------ | ------------ | ---------- | ------------------------------------------------------------ | +| 关系型数据库 | MySQL/Mariadb/Tidb | √ | √ | [读](datax/plugin/reader/mysql/README.md)、[写](datax/plugin/writer/mysql/README.md) | +| | Postgres/Greenplum | √ | √ | [读](datax/plugin/reader/postgres/README.md)、[写](datax/plugin/writer/postgres/README.md) | +| | DB2 LUW | √ | √ | [读](datax/plugin/reader/db2/README.md)、[写](datax/plugin/writer/db2/README.md) | +| | SQL Server | √ | √ | [读](datax/plugin/reader/sqlserver/README.md)、[写](datax/plugin/writer/sqlserver/README.md) | +| | Oracle | √ | √ | [读](datax/plugin/reader/oracle/README.md)、[写](datax/plugin/writer/oracle/README.md) | +| 无结构流 | CSV | √ | √ | [读](datax/plugin/reader/csv/README.md)、[写](datax/plugin/writer/csv/README.md) | +| | XLSX(excel) | √ | √ | [读](datax/plugin/reader/xlsx/README.md)、[写](datax/plugin/writer/xlsx/README.md) | -当返回值是0,并且显示run success,表示执行成功 - -当返回值是1,并且显示run fail,并告知执行失败的原因 +#### 2.1.2 使用示例 -#### 数据库全局配置 - -```json -{ - "job":{ - "setting":{ - "pool":{ - "maxOpenConns":8, - "maxIdleConns":8, - "connMaxIdleTime":"40m", - "connMaxLifetime":"40m" - }, - "retry":{ - "type":"ntimes", - "strategy":{ - "n":3, - "wait":"1s" - }, - "ignoreOneByOneError":true - } - } - } -} -``` -##### 连接池pool -+ maxOpenConns: 最大连接打开数 -+ maxIdleConns: 最大空闲连接打开数 -+ connMaxIdleTime: 最大空闲时间 -+ connMaxLifetime: 最大生存时间 - -##### 重试retry -ignoreOneByOneError 是否忽略一个个重试错误 -+ 重试类型type和重试策略 -1. 类型有`ntimes`,指n次数重复重试策略,`"strategy":{"n":3,"wait":"1s"}`,n代表重试次数,wait代表等待时间 -2. 类型有`forever`,指永久重复重试策略,`"strategy":{"wait":"1s"}`,wait代表等待时间 -3. 类型有`exponential`,指幂等重复重试策略,`"strategy":{"init":"100ms","max":"4s"}`,init代表开始时间,max代表最大时间 - -### 使用示例 - -#### 使用mysql同步 +##### 2.1.2.1 使用mysql同步 - 使用cmd/datax/examples/mysql/init.sql初始化数据库**用于测试** - 开启同步mysql命令 @@ -223,7 +128,7 @@ ignoreOneByOneError 是否忽略一个个重试错误 datax -c examples/mysql/config.json ``` -#### 使用postgres同步 +##### 2.1.2.2 使用postgres同步 - 使用cmd/datax/examples/postgres/init.sql初始化数据库**用于测试** - 开启同步postgres命令 @@ -232,7 +137,7 @@ datax -c examples/mysql/config.json datax -c examples/postgres/config.json ``` -#### 使用db2同步 +##### 2.1.2.3 使用db2同步 - 注意使用前请下载相应的db2的odbc库,如linux的make dependencies和release.bat - 注意在linux下如Makefile所示export LD_LIBRARY_PATH=${DB2HOME}/lib @@ -244,7 +149,7 @@ datax -c examples/postgres/config.json datax -c examples/db2/config.json ``` -#### 使用oracle同步 +##### 2.1.2.4 使用oracle同步 - 注意使用前请下载相应的[Oracle Instant Client]( https://www.oracle.com/database/technologies/instant-client/downloads.html),例如,连接oracle 11g最好下载12.x版本。 - 注意在linux下如export LD_LIBRARY_PATH=/opt/oracle/instantclient_21_1:$LD_LIBRARY_PATH,另需要安装libaio @@ -257,7 +162,7 @@ Oracle Instant Client 19不再支持windows7 datax -c examples/oracle/config.json ``` -#### 使用sql server同步 +##### 2.1.2.5 使用sql server同步 - 使用cmd/datax/examples/sqlserver/init.sql初始化数据库**用于测试** - 开启同步sql server命令 @@ -266,7 +171,7 @@ datax -c examples/oracle/config.json datax -c examples/sqlserver/config.json ``` -#### 使用csv同步到postgres +##### 2.1.2.6 使用csv同步到postgres - 使用cmd/datax/examples/csvpostgres/init.sql初始化数据库**用于测试** - 开启同步命令 @@ -275,7 +180,7 @@ datax -c examples/sqlserver/config.json datax -c examples/csvpostgres/config.json ``` -#### 使用xlsx同步到postgres +##### 2.1.2.7 使用xlsx同步到postgres - 使用cmd/examples/datax/csvpostgres/init.sql初始化数据库**用于测试** - 开启同步命令 @@ -284,7 +189,7 @@ datax -c examples/csvpostgres/config.json datax -c examples/xlsxpostgres/config.json ``` -#### 使用postgres同步csv +##### 2.1.2.8 使用postgres同步csv - 使用cmd/datax/examples/csvpostgres/init.sql初始化数据库**用于测试** - 开启同步命令 @@ -293,7 +198,7 @@ datax -c examples/xlsxpostgres/config.json datax -c examples/postgrescsv/config.json ``` -#### 使用postgres同步xlsx +##### 2.1.2.9 使用postgres同步xlsx - 使用cmd/datax/examples/csvpostgres/init.sql初始化数据库**用于测试** - 开启同步命令 @@ -302,11 +207,52 @@ datax -c examples/postgrescsv/config.json datax -c examples/postgresxlsx/config.json ``` -#### 使用切分键 +##### 2.1.2.10 其他同步例子 + +除了上述例子外,在go-etl特性中所列出的数据源都可以交叉使用,还配置例如mysql到postgresql数据源,mysql到oracle,oracle到db2等等, + +#### 2.1.3 数据库全局配置 + +```json +{ + "job":{ + "setting":{ + "pool":{ + "maxOpenConns":8, + "maxIdleConns":8, + "connMaxIdleTime":"40m", + "connMaxLifetime":"40m" + }, + "retry":{ + "type":"ntimes", + "strategy":{ + "n":3, + "wait":"1s" + }, + "ignoreOneByOneError":true + } + } + } +} +``` +##### 2.1.3.1 连接池pool ++ maxOpenConns: 最大连接打开数 ++ maxIdleConns: 最大空闲连接打开数 ++ connMaxIdleTime: 最大空闲时间 ++ connMaxLifetime: 最大生存时间 + +##### 2.1.3.2 重试retry +ignoreOneByOneError 是否忽略一个个重试错误 ++ 重试类型type和重试策略 +1. 类型有`ntimes`,指n次数重复重试策略,`"strategy":{"n":3,"wait":"1s"}`,n代表重试次数,wait代表等待时间 +2. 类型有`forever`,指永久重复重试策略,`"strategy":{"wait":"1s"}`,wait代表等待时间 +3. 类型有`exponential`,指幂等重复重试策略,`"strategy":{"init":"100ms","max":"4s"}`,init代表开始时间,max代表最大时间 + +#### 2.1.4 使用切分键 这里假设数据按切分键分布是均匀的,合理使用这样的切分键可以使同步更快,另外为了加快对最大值和最小值的查询,这里对于大表可以预设最大最小值 -##### 测试方式 +##### 2.1.4.1 测试方式 - 使用程序生成mysql数据产生split.csv ```bash cd cmd/datax/examples/split @@ -324,11 +270,11 @@ datax -c examples/split/csv.json datax -c examples/split/mysql.json ``` -#### 使用preSql和postSql +#### 2.1.5 使用preSql和postSql preSql和postSql分别是写入数据前和写入数据后的sql语句组 -##### 测试方式 +##### 2.1.5.1 测试方式 在本例子中,采用了全量导入的方式 1.写入数据前先建立了一个临时表 2.在写入数据后,将原表删除,将临时表重名为新表 @@ -337,6 +283,156 @@ preSql和postSql分别是写入数据前和写入数据后的sql语句组 datax -c examples/prePostSql/mysql.json ``` -#### 其他同步例子 +#### 2.1.6 流控配置 -除了上述例子外,在go-etl特性中所列出的数据源都可以交叉使用,还配置例如mysql到postgresql数据源,mysql到oracle,oracle到db2等等, \ No newline at end of file +之前speed的byte和record配置并不会生效,现在加入流控特性后,byte和record将会生效,byte会限制缓存消息字节数,而record会限制缓存消息条数,如果byte设置过小会导致缓存过小而导致同步数据失败。当byte为0或负数时,限制器将不会工作, 例如byte为10485760,即10Mb(10x1024x1024)。 + +```json +{ + "job":{ + "setting":{ + "speed":{ + "byte":10485760, + "record":1024, + "channel":4 + } + } + } +} +``` + +### 2.2 多任务数据同步 + +#### 2.2.1 使用方式 + +##### 2.2.1.1 数据源配置文件 + +配置数据源配置文件,如从mysql同步到postgres中 + +```json +{ + "core" : { + "container": { + "job":{ + "id": 1, + "sleepInterval":100 + } + } + }, + "job":{ + "content":[ + { + "reader":{ + "name": "mysqlreader", + "parameter": { + "username": "test:", + "password": "test:", + "column": ["*"], + "connection": { + "url": "tcp(192.168.15.130:3306)/source?parseTime=false", + "table": { + "db":"source", + "name":"type_table" + } + }, + "where": "" + } + }, + "writer":{ + "name": "postgreswriter", + "parameter": { + "username": "postgres", + "password": "123456", + "writeMode": "insert", + "column": ["*"], + "preSql": [], + "connection": { + "url": "postgres://192.168.15.130:5432/postgres?sslmode=disable&connect_timeout=2", + "table": { + "schema":"destination", + "name":"type_table" + } + }, + "batchTimeout": "1s", + "batchSize":1000 + } + }, + "transformer":[] + } + ] + } +} +``` + +##### 2.2.1.2 源目的配置向导文件 + +源目的配置向导文件是csv文件,每行配置可以配置如下: + +```csv +path[table],path[table] +``` + +每一列可以是路径或者是表名,注意所有的表要配置库名或者模式名,需要在数据源配置文件配置。 + +##### 2.2.1.3 批量生成数据配置集和执行脚本 + +```bash +datax -c tools/testData/xlsx.json -w tools/testData/wizard.csv +``` +-c 指定数据源配置文件 -w 指定源目的配置向导文件。 + +执行结果会在数据源配置文件目录文件生成源目的配置向导文件行数的配置集,分别以指定数据源配置文件1.json,指定数据源配置文件2.json,...,指定数据源配置文件[n].json的配置集。 + +另外,在当前目录会生成执行脚本run.bat或者run.sh。 + +##### 2.2.1.4 批量生成数据配置集和执行脚本 + +###### windows + +```bash +run.bat +``` + +linux + +```bash +run.sh +``` + +#### 2.2.2 测试结果 +可以运行cmd/datax/testData的测试数据 +```bash +cd cmd/datax +datax -c testData/xlsx.json -w testData/wizard.csv +``` +结果会在testData下生成wizard.csv行数的配置文件,分别以xlsx1.json,xlsx2.json,...,xlsx[n].json的配置集。 + +### 2.3 数据同步帮助手册 + +#### 2.3.1 帮助命令 + +``` +datax -h +``` + +帮助显示 + +```bash +Usage of datax: + -c string #数据源配置文件 + config (default "config.json") + -w string #源目的配置向导文件 + wizard +``` + +#### 2.3.2 查看版本 + +``` +datax version +``` + +显示`版本号`(git commit: `git提交号`) complied by go version `go版本号` + +``` +v0.1.0 (git commit: c82eb302218f38cd3851df4b425256e93f85160d) complied by go version go1.16.5 windows/amd64 +``` \ No newline at end of file diff --git a/cmd/datax/examples/csvpostgres/config.json b/cmd/datax/examples/csvpostgres/config.json index 0f5db49..36902bf 100644 --- a/cmd/datax/examples/csvpostgres/config.json +++ b/cmd/datax/examples/csvpostgres/config.json @@ -47,13 +47,6 @@ }, "transformer":[] } - ], - "setting":{ - "speed":{ - "byte":0, - "record":1024, - "channel":4 - } - } + ] } } \ No newline at end of file diff --git a/cmd/datax/examples/db2/config.json b/cmd/datax/examples/db2/config.json index 25b5dff..e6c61eb 100644 --- a/cmd/datax/examples/db2/config.json +++ b/cmd/datax/examples/db2/config.json @@ -47,13 +47,6 @@ }, "transformer":[] } - ], - "setting":{ - "speed":{ - "byte":0, - "record":1024, - "channel":4 - } - } + ] } } \ No newline at end of file diff --git a/cmd/datax/examples/limit/csv.json b/cmd/datax/examples/limit/csv.json index 0c58e1f..b5000d2 100644 --- a/cmd/datax/examples/limit/csv.json +++ b/cmd/datax/examples/limit/csv.json @@ -28,13 +28,6 @@ }, "transformer":[] } - ], - "setting":{ - "speed":{ - "byte":1024, - "record":100, - "channel":4 - } - } + ] } } \ No newline at end of file diff --git a/cmd/datax/examples/mysql/config.json b/cmd/datax/examples/mysql/config.json index f4c5482..40576ee 100644 --- a/cmd/datax/examples/mysql/config.json +++ b/cmd/datax/examples/mysql/config.json @@ -48,13 +48,6 @@ }, "transformer":[] } - ], - "setting":{ - "speed":{ - "byte":0, - "record":1024, - "channel":4 - } - } + ] } } \ No newline at end of file diff --git a/cmd/datax/examples/oracle/config.json b/cmd/datax/examples/oracle/config.json index a6d7461..f8e62e5 100644 --- a/cmd/datax/examples/oracle/config.json +++ b/cmd/datax/examples/oracle/config.json @@ -47,13 +47,6 @@ }, "transformer":[] } - ], - "setting":{ - "speed":{ - "byte":0, - "record":1024, - "channel":4 - } - } + ] } } \ No newline at end of file diff --git a/cmd/datax/examples/postgres/config.json b/cmd/datax/examples/postgres/config.json index 802efd9..e4451ae 100644 --- a/cmd/datax/examples/postgres/config.json +++ b/cmd/datax/examples/postgres/config.json @@ -49,13 +49,6 @@ }, "transformer":[] } - ], - "setting":{ - "speed":{ - "byte":0, - "record":1024, - "channel":4 - } - } + ] } } \ No newline at end of file diff --git a/cmd/datax/examples/postgrescsv/config.json b/cmd/datax/examples/postgrescsv/config.json index 02ca115..8266c2d 100644 --- a/cmd/datax/examples/postgrescsv/config.json +++ b/cmd/datax/examples/postgrescsv/config.json @@ -46,13 +46,6 @@ }, "transformer":[] } - ], - "setting":{ - "speed":{ - "byte":0, - "record":1024, - "channel":4 - } - } + ] } } \ No newline at end of file diff --git a/cmd/datax/examples/postgresxlsx/config.json b/cmd/datax/examples/postgresxlsx/config.json index 7d3d03b..2936503 100644 --- a/cmd/datax/examples/postgresxlsx/config.json +++ b/cmd/datax/examples/postgresxlsx/config.json @@ -49,13 +49,6 @@ }, "transformer":[] } - ], - "setting":{ - "speed":{ - "byte":0, - "record":1024, - "channel":4 - } - } + ] } } \ No newline at end of file diff --git a/cmd/datax/examples/prePostSql/mysql.json b/cmd/datax/examples/prePostSql/mysql.json index e0322be..adbe75d 100644 --- a/cmd/datax/examples/prePostSql/mysql.json +++ b/cmd/datax/examples/prePostSql/mysql.json @@ -52,13 +52,6 @@ }, "transformer":[] } - ], - "setting":{ - "speed":{ - "byte":0, - "record":1024, - "channel":4 - } - } + ] } } \ No newline at end of file diff --git a/cmd/datax/examples/split/csv.json b/cmd/datax/examples/split/csv.json index 2928a1c..763c399 100644 --- a/cmd/datax/examples/split/csv.json +++ b/cmd/datax/examples/split/csv.json @@ -4,20 +4,6 @@ "job":{ "id": 1, "sleepInterval":100 - }, - "taskGroup":{ - "id": 1, - "failover":{ - "retryIntervalInMsec":0 - } - } - }, - "transport":{ - "channel":{ - "speed":{ - "byte": 100, - "record":100 - } } } }, diff --git a/cmd/datax/examples/sqlserver/config.json b/cmd/datax/examples/sqlserver/config.json index e952e87..bf767a4 100644 --- a/cmd/datax/examples/sqlserver/config.json +++ b/cmd/datax/examples/sqlserver/config.json @@ -52,13 +52,6 @@ }, "transformer":[] } - ], - "setting":{ - "speed":{ - "byte":0, - "record":1024, - "channel":4 - } - } + ] } } \ No newline at end of file diff --git a/cmd/datax/examples/xlsxpostgres/config.json b/cmd/datax/examples/xlsxpostgres/config.json index 8bcbbf8..d35080b 100644 --- a/cmd/datax/examples/xlsxpostgres/config.json +++ b/cmd/datax/examples/xlsxpostgres/config.json @@ -50,13 +50,6 @@ }, "transformer":[] } - ], - "setting":{ - "speed":{ - "byte":0, - "record":1024, - "channel":4 - } - } + ] } } \ No newline at end of file diff --git a/cmd/datax/log.go b/cmd/datax/log.go index 4f0d140..8ab74e3 100644 --- a/cmd/datax/log.go +++ b/cmd/datax/log.go @@ -23,7 +23,7 @@ import ( var log = mylog.NewDefaultLogger(os.Stdout, mylog.DebugLevel, "[datax]") func init() { - f, err := os.Create("datax.log") + f, err := os.OpenFile("datax.log", os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) if err != nil { panic(err) } diff --git a/cmd/datax/main.go b/cmd/datax/main.go index 64ec160..1623e8c 100644 --- a/cmd/datax/main.go +++ b/cmd/datax/main.go @@ -36,6 +36,8 @@ func main() { return } + log.Infof("config: %v\n", *configFile) + e := newEnveronment(*configFile) defer e.close() if err := e.build(); err != nil { diff --git a/datax/README.md b/datax/README.md index fce9f93..51d7c9b 100644 --- a/datax/README.md +++ b/datax/README.md @@ -1,4 +1,4 @@ -# go-etl开发者指南 +# go-etl数据同步开发者指南 ## 同步框架简介 @@ -112,14 +112,14 @@ cd tools/go-etl/plugin go run main.go -t reader -p Mysql ``` -这个命令会在go-etl/plugin/reader中自动生成一个如下DB2的reader模板来帮助开发 +这个命令会在datax/plugin/reader中自动生成一个如下mysql的reader模板来帮助开发 ``` - plugin --- reader---mysql--+-----resources--+--plugin.json - |--job.go |--plugin_job_template.json - |--reader.go - |--README.md - |--task.go + reader---mysql--+-----resources--+--plugin.json + |--job.go |--plugin_job_template.json + |--reader.go + |--README.md + |--task.go ``` 如下,不要忘了在plugin.json加入开发者名字和描述 @@ -142,9 +142,9 @@ go run main.go -t reader -p Mysql 查看[数据库存储开发者指南](../storage/database/README.md),不仅能帮助你更快地实现Reader插件接口,而且能帮助你更快地实现Writer插件接口 -##### rdbm reader +##### dbms reader -rdbm reader通过抽象数据库存储的DBWrapper结构体成如下Querier,然后利用Querier完成Job和Task的实现 +dbms reader通过抽象数据库存储的DBWrapper结构体成如下Querier,然后利用Querier完成Job和Task的实现 ```go //Querier 查询器 @@ -166,7 +166,7 @@ type Querier interface { } ``` -像mysql实现Job和Reader,对于Task需要使用rdbm.StartRead函数实现StartRead方法 +像mysql实现Job和Reader,对于Task需要使用dbms.StartRead函数实现StartRead方法 #### 二维表文件流 @@ -239,14 +239,13 @@ cd tools/go-etl/plugin go run main.go -t writer -p Mysql ``` -这个命令会在go-etl/plugin/writer中自动生成如下一个DB2的writer模板来帮助开发 - +这个命令会在datax/plugin/writer中自动生成如下一个mysql的writer模板来帮助开发 ``` - plugin ---- writer--mysql---+-----resources--+--plugin.json - |--job.go |--plugin_job_template.json - |--README.md - |--task.go - |--writer.go + writer--mysql---+-----resources--+--plugin.json + |--job.go |--plugin_job_template.json + |--README.md + |--task.go + |--writer.go ``` 如下,不要忘了在plugin.json加入开发者名字和描述 @@ -261,17 +260,17 @@ go run main.go -t writer -p Mysql 另外,这个可以帮助开发者避免在使用插件注册命令后编译时报错。 -#### 关系型数据库 +#### 数据库 -如果你想帮忙实现关系型数据库的数据源,根据以下方式去实现你的数据源将更加方便 +如果你想帮忙实现数据库的数据源,根据以下方式去实现你的数据源将更加方便,当然前提你所使用的驱动库必须实现golang标准库的database/sql的接口。 ##### 数据库存储 查看[数据库存储开发者指南](../storage/database/README.md),不仅能帮助你更快地实现Reader插件接口,而且能帮助你更快地实现Writer插件接口 -##### rdbm writer +##### dbms writer -rdbm writer通过抽象数据库存储的DBWrapper结构体成如下Execer,然后利用Execer完成Job和Task的实现 +dbms writer通过抽象数据库存储的DBWrapper结构体成如下Execer,然后利用Execer完成Job和Task的实现 ```go //Execer 执行器 @@ -299,7 +298,7 @@ type Execer interface { } ``` -像mysql实现Job和Writer,对于Task需要使用rdbm.StartWrite函数实现StartWrite方法 +像mysql实现Job和Writer,对于Task需要使用dbms.StartWrite函数实现StartWrite方法 #### 二维表文件流 @@ -408,7 +407,7 @@ type Execer interface { - 使用正确的数据类型。比如,bool类型的值使用`true`/`false`,而非`"yes"`/`"true"`/`0`等。 - 合理使用集合类型,比如,用数组替代有分隔符的字符串。 -- 类似通用:遵守同一类型的插件的习惯,比如关系型数据库的`connection`参数都是如下结构: +- 类似通用:遵守同一类型的插件的习惯,比如数据库的`connection`参数都是如下结构: ```json { diff --git a/datax/core/job/container.go b/datax/core/job/container.go index a66bb35..67b9219 100644 --- a/datax/core/job/container.go +++ b/datax/core/job/container.go @@ -83,31 +83,38 @@ func (c *Container) Start() (err error) { log.Debugf("DataX jobContainer %v starts to preHandle.", c.jobID) if err = c.preHandle(); err != nil { + log.Errorf("DataX jobContainer %v preHandle failed.", c.jobID, err) return } log.Infof("DataX jobContainer %v starts to init.", c.jobID) if err = c.init(); err != nil { + log.Errorf("DataX jobContainer %v init failed.", c.jobID, err) return } log.Infof("DataX jobContainer %v starts to prepare.", c.jobID) if err = c.prepare(); err != nil { + log.Errorf("DataX jobContainer %v prepare failed.", c.jobID, err) return } log.Infof("DataX jobContainer %v starts to split.", c.jobID) if err = c.split(); err != nil { + log.Errorf("DataX jobContainer %v split failed.", c.jobID, err) return } log.Infof("DataX jobContainer %v starts to schedule.", c.jobID) if err = c.schedule(); err != nil { + log.Errorf("DataX jobContainer %v schedule failed.", c.jobID, err) return } log.Infof("DataX jobContainer %v starts to post.", c.jobID) if err = c.post(); err != nil { + log.Errorf("DataX jobContainer %v post failed.", c.jobID, err) return } log.Debugf("DataX jobContainer %v starts to postHandle.", c.jobID) if err = c.postHandle(); err != nil { + log.Errorf("DataX jobContainer %v postHandle failed.", c.jobID, err) return } @@ -379,7 +386,8 @@ func (c *Container) distributeTaskIntoTaskGroup() (confs []*config.JSON, err err var speed *config.JSON speed, err = c.Config().GetConfig(coreconst.DataxJobSettingSpeed) if err != nil { - return + speed, _ = config.NewJSONFromString("{}") + err = nil } speed.Remove("channel") @@ -415,8 +423,8 @@ func (c *Container) adjustChannelNumber() error { var needChannelNumberByByte int64 = math.MaxInt32 var needChannelNumberByRecord int64 = math.MaxInt32 - if isChannelLimit := c.Config().GetInt64OrDefaullt(coreconst.DataxJobSettingSpeedChannel, 0) > 0; isChannelLimit { - c.needChannelNumber, _ = c.Config().GetInt64(coreconst.DataxJobSettingSpeedChannel) + if isChannelLimit := c.Config().GetInt64OrDefaullt(coreconst.DataxJobSettingSpeedChannel, 1) > 0; isChannelLimit { + c.needChannelNumber = c.Config().GetInt64OrDefaullt(coreconst.DataxJobSettingSpeedChannel, 1) log.Infof("DataX jobContainer %v set Channel-Number to %v channels.", c.jobID, c.needChannelNumber) return nil } diff --git a/storage/database/README.md b/storage/database/README.md index 1ec4e42..49a00c6 100644 --- a/storage/database/README.md +++ b/storage/database/README.md @@ -1,6 +1,6 @@ # 数据库存储开发者指南 -数据库存储是数据库查询和执行SQL的框架,用于关系型数据库的抽象 +数据库存储是数据库查询和执行SQL的框架,用于数据库的抽象,其底层是借助golang标准库的database/sql的接口来实现的。 ## 数据库存储简介 @@ -45,7 +45,7 @@ type Source interface { 具体实现Source接口时,可以组合BaseSource以简化具体实现Source接口的实现Table方法可以返回具体的表结构接口。可以看mysql包source.go的实现。 -另外,连接信息依赖Config的依赖。目前Config需要用下面的方式定义,否则无法使用rdbm包来实现datax的插件,可以看mysql包config.go的实现。 +另外,连接信息依赖Config的依赖。目前Config需要用下面的方式定义,否则无法使用dbms包来实现datax的插件,可以看mysql包config.go的实现。 ```go type Config struct { diff --git a/tools/datax/build/main.go b/tools/datax/build/main.go index b4a44ee..7b092f4 100644 --- a/tools/datax/build/main.go +++ b/tools/datax/build/main.go @@ -371,6 +371,7 @@ func main() { } } +//生成plugin的reader/writer插件文件 type pluginParser struct { infos []pluginInfo } @@ -475,6 +476,8 @@ func writeVersionCode() (err error) { return } +//通过git获取git版本号 `tag`` (git commit: `git version`) complied by gp version `go version` +//例如 v0.1.2 (git commit: c26eb4e15751e41d32402cbf3c7f1ea8af4e3e47) complied by go version go1.16.14 windows/amd64 func getVersion() (version string, err error) { output := "" if output, err = cmdOutput("git", "describe", "--abbrev=0", "--tags"); err != nil { diff --git a/tools/datax/plugin/main.go b/tools/datax/plugin/main.go index d8108ff..31647c9 100644 --- a/tools/datax/plugin/main.go +++ b/tools/datax/plugin/main.go @@ -181,11 +181,23 @@ func main() { }) switch *typ { + // datax/plugin/reader中自动生成一个如下mysql的reader模板来帮助开发 + // reader---mysql--+-----resources--+--plugin.json + // |--job.go |--plugin_job_template.json + // |--reader.go + // |--README.md + // |--task.go case "reader": files = append(files, file{ filename: filepath.Join(packPath, "reader.go"), content: fmt.Sprintf(readerFile, p), }) + // datax/plugin/writer中自动生成如下一个mysql的writer模板来帮助开发 + // writer--mysql---+-----resources--+--plugin.json + // |--job.go |--plugin_job_template.json + // |--README.md + // |--task.go + // |--writer.go case "writer": files = append(files, file{ filename: filepath.Join(packPath, "writer.go"), diff --git a/tools/license/main.go b/tools/license/main.go index 445aab4..7357ac9 100644 --- a/tools/license/main.go +++ b/tools/license/main.go @@ -100,6 +100,7 @@ func main() { wg.Wait() } +//添加许可证 func readPackages(path string) (packages []string, err error) { var list []os.FileInfo list, err = ioutil.ReadDir(path) @@ -119,6 +120,7 @@ func readPackages(path string) (packages []string, err error) { return } +//检查许可证 func addLicenseHeader(filename string) error { data, err := ioutil.ReadFile(filename) if err != nil { From cf92a4c44c1619418af424a947ef696fa796b741 Mon Sep 17 00:00:00 2001 From: Breeze0806 Date: Wed, 14 Jun 2023 20:36:29 +0800 Subject: [PATCH 08/10] =?UTF-8?q?feat=EF=BC=9A1.=20=E6=96=B0=E5=A2=9E?= =?UTF-8?q?=E6=89=93=E5=8C=85=E7=A8=8B=E5=BA=8F=E5=9C=A8=E5=8C=85=E4=B8=AD?= =?UTF-8?q?=E5=86=99=E5=85=A5=E7=94=A8=E6=88=B7=E6=89=8B=E5=86=8C=202.?= =?UTF-8?q?=E6=96=B0=E5=A2=9Elog=E6=96=87=E4=BB=B6=E8=BF=BD=E5=8A=A0?= =?UTF-8?q?=E6=89=93=E5=8D=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 5 +- Makefile | 2 +- README_USER.md | 17 +- .../examples/limit/{csv.json => config.json} | 9 +- .../prePostSql/{mysql.json => config.json} | 0 .../split/{mysql-default.json => config.json} | 0 cmd/datax/tools/testData/csv.json | 23 +- cmd/datax/tools/testData/db2.json | 23 +- cmd/datax/tools/testData/mysql.json | 23 +- cmd/datax/tools/testData/oracle.json | 23 +- cmd/datax/tools/testData/postgres.json | 23 +- cmd/datax/tools/testData/sqlserver.json | 14 - cmd/datax/tools/testData/xlsx.json | 23 +- datax/common/plugin/job_collector.go | 6 +- datax/common/plugin/job_test.go | 6 +- datax/core/container.go | 12 - datax/core/container_test.go | 30 --- datax/core/job/container.go | 2 +- .../statistics/communication/comminication.go | 19 -- .../core/statistics/container/communicator.go | 38 --- .../container/plugin/default_job_collector.go | 8 +- datax/core/taskgroup/task_execer.go | 16 +- release.bat | 3 +- tools/datax/release/main.go | 246 ++++++++++++++++++ 24 files changed, 301 insertions(+), 270 deletions(-) rename cmd/datax/examples/limit/{csv.json => config.json} (83%) rename cmd/datax/examples/prePostSql/{mysql.json => config.json} (100%) rename cmd/datax/examples/split/{mysql-default.json => config.json} (100%) delete mode 100644 datax/core/statistics/communication/comminication.go delete mode 100644 datax/core/statistics/container/communicator.go create mode 100644 tools/datax/release/main.go diff --git a/.gitignore b/.gitignore index 244b3a1..644fa86 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,7 @@ datax/plugin_test.go datax/plugin.go *.txt *.out -*.exe \ No newline at end of file +*.exe +release/* +*.zip +*.tar.gz \ No newline at end of file diff --git a/Makefile b/Makefile index 4924c52..2246670 100644 --- a/Makefile +++ b/Makefile @@ -59,7 +59,7 @@ cover: .PHONY: release release: @go generate ./... && cd cmd/datax && go build && cd ../.. - + @go run tools/datax/release/main.go .PHONY: doc doc: @godoc -http=:6080 \ No newline at end of file diff --git a/README_USER.md b/README_USER.md index 2d2791a..380c8b4 100644 --- a/README_USER.md +++ b/README_USER.md @@ -119,6 +119,8 @@ data -c config.json #### 2.1.2 使用示例 +注意在linux下如Makefile所示export LD_LIBRARY_PATH=${DB2HOME}/lib + ##### 2.1.2.1 使用mysql同步 - 使用cmd/datax/examples/mysql/init.sql初始化数据库**用于测试** @@ -264,10 +266,10 @@ go run main.go cd ../.. datax -c examples/split/csv.json ``` -- 修改examples/split/mysql.json的split的key为id,dt,str +- 修改examples/split/config.json的split的key为id,dt,str - mysql数据库切分同步整形,日期,字符串类型 ```bash -datax -c examples/split/mysql.json +datax -c examples/split/config.json ``` #### 2.1.5 使用preSql和postSql @@ -280,7 +282,7 @@ preSql和postSql分别是写入数据前和写入数据后的sql语句组 2.在写入数据后,将原表删除,将临时表重名为新表 ```bash -datax -c examples/prePostSql/mysql.json +datax -c examples/prePostSql/config.json ``` #### 2.1.6 流控配置 @@ -300,6 +302,15 @@ datax -c examples/prePostSql/mysql.json } } ``` +##### 2.1.6.1 流控测试 +- 使用程序生成src.csv,发起流控测试 +```bash +cd cmd/datax/examples/limit +go run main.go +cd ../.. +datax -c examples/limit/config.json +``` + ### 2.2 多任务数据同步 diff --git a/cmd/datax/examples/limit/csv.json b/cmd/datax/examples/limit/config.json similarity index 83% rename from cmd/datax/examples/limit/csv.json rename to cmd/datax/examples/limit/config.json index b5000d2..3e03009 100644 --- a/cmd/datax/examples/limit/csv.json +++ b/cmd/datax/examples/limit/config.json @@ -28,6 +28,13 @@ }, "transformer":[] } - ] + ], + "setting":{ + "speed":{ + "byte":10485760, + "record":1024, + "channel":4 + } + } } } \ No newline at end of file diff --git a/cmd/datax/examples/prePostSql/mysql.json b/cmd/datax/examples/prePostSql/config.json similarity index 100% rename from cmd/datax/examples/prePostSql/mysql.json rename to cmd/datax/examples/prePostSql/config.json diff --git a/cmd/datax/examples/split/mysql-default.json b/cmd/datax/examples/split/config.json similarity index 100% rename from cmd/datax/examples/split/mysql-default.json rename to cmd/datax/examples/split/config.json diff --git a/cmd/datax/tools/testData/csv.json b/cmd/datax/tools/testData/csv.json index 2137958..c836943 100644 --- a/cmd/datax/tools/testData/csv.json +++ b/cmd/datax/tools/testData/csv.json @@ -4,20 +4,6 @@ "job":{ "id": 1, "sleepInterval":100 - }, - "taskGroup":{ - "id": 1, - "failover":{ - "retryIntervalInMsec":0 - } - } - }, - "transport":{ - "channel":{ - "speed":{ - "byte": 100, - "record":100 - } } } }, @@ -58,13 +44,6 @@ }, "transformer":[] } - ], - "setting":{ - "speed":{ - "byte":3000, - "record":400, - "channel":4 - } - } + ] } } \ No newline at end of file diff --git a/cmd/datax/tools/testData/db2.json b/cmd/datax/tools/testData/db2.json index 793f6bf..e6c61eb 100644 --- a/cmd/datax/tools/testData/db2.json +++ b/cmd/datax/tools/testData/db2.json @@ -4,20 +4,6 @@ "job":{ "id": 1, "sleepInterval":100 - }, - "taskGroup":{ - "id": 1, - "failover":{ - "retryIntervalInMsec":0 - } - } - }, - "transport":{ - "channel":{ - "speed":{ - "byte": 100, - "record":100 - } } } }, @@ -61,13 +47,6 @@ }, "transformer":[] } - ], - "setting":{ - "speed":{ - "byte":3000, - "record":400, - "channel":4 - } - } + ] } } \ No newline at end of file diff --git a/cmd/datax/tools/testData/mysql.json b/cmd/datax/tools/testData/mysql.json index e5a1d0d..40576ee 100644 --- a/cmd/datax/tools/testData/mysql.json +++ b/cmd/datax/tools/testData/mysql.json @@ -4,20 +4,6 @@ "job":{ "id": 1, "sleepInterval":100 - }, - "taskGroup":{ - "id": 1, - "failover":{ - "retryIntervalInMsec":0 - } - } - }, - "transport":{ - "channel":{ - "speed":{ - "byte": 100, - "record":100 - } } } }, @@ -62,13 +48,6 @@ }, "transformer":[] } - ], - "setting":{ - "speed":{ - "byte":3000, - "record":400, - "channel":4 - } - } + ] } } \ No newline at end of file diff --git a/cmd/datax/tools/testData/oracle.json b/cmd/datax/tools/testData/oracle.json index a955db1..f8e62e5 100644 --- a/cmd/datax/tools/testData/oracle.json +++ b/cmd/datax/tools/testData/oracle.json @@ -4,20 +4,6 @@ "job":{ "id": 1, "sleepInterval":100 - }, - "taskGroup":{ - "id": 1, - "failover":{ - "retryIntervalInMsec":0 - } - } - }, - "transport":{ - "channel":{ - "speed":{ - "byte": 100, - "record":100 - } } } }, @@ -61,13 +47,6 @@ }, "transformer":[] } - ], - "setting":{ - "speed":{ - "byte":3000, - "record":400, - "channel":4 - } - } + ] } } \ No newline at end of file diff --git a/cmd/datax/tools/testData/postgres.json b/cmd/datax/tools/testData/postgres.json index a360e1f..19fff56 100644 --- a/cmd/datax/tools/testData/postgres.json +++ b/cmd/datax/tools/testData/postgres.json @@ -4,20 +4,6 @@ "job":{ "id": 1, "sleepInterval":100 - }, - "taskGroup":{ - "id": 1, - "failover":{ - "retryIntervalInMsec":0 - } - } - }, - "transport":{ - "channel":{ - "speed":{ - "byte": 100, - "record":100 - } } } }, @@ -62,13 +48,6 @@ }, "transformer":[] } - ], - "setting":{ - "speed":{ - "byte":3000, - "record":400, - "channel":4 - } - } + ] } } \ No newline at end of file diff --git a/cmd/datax/tools/testData/sqlserver.json b/cmd/datax/tools/testData/sqlserver.json index 12ecac3..504365d 100644 --- a/cmd/datax/tools/testData/sqlserver.json +++ b/cmd/datax/tools/testData/sqlserver.json @@ -4,20 +4,6 @@ "job":{ "id": 1, "sleepInterval":100 - }, - "taskGroup":{ - "id": 1, - "failover":{ - "retryIntervalInMsec":0 - } - } - }, - "transport":{ - "channel":{ - "speed":{ - "byte": 100, - "record":100 - } } } }, diff --git a/cmd/datax/tools/testData/xlsx.json b/cmd/datax/tools/testData/xlsx.json index b139fbc..439c8a1 100644 --- a/cmd/datax/tools/testData/xlsx.json +++ b/cmd/datax/tools/testData/xlsx.json @@ -4,20 +4,6 @@ "job":{ "id": 1, "sleepInterval":100 - }, - "taskGroup":{ - "id": 1, - "failover":{ - "retryIntervalInMsec":0 - } - } - }, - "transport":{ - "channel":{ - "speed":{ - "byte": 100, - "record":100 - } } } }, @@ -64,13 +50,6 @@ }, "transformer":[] } - ], - "setting":{ - "speed":{ - "byte":3000, - "record":400, - "channel":4 - } - } + ] } } \ No newline at end of file diff --git a/datax/common/plugin/job_collector.go b/datax/common/plugin/job_collector.go index f5523d2..013931a 100644 --- a/datax/common/plugin/job_collector.go +++ b/datax/common/plugin/job_collector.go @@ -14,9 +14,11 @@ package plugin +import "github.com/Breeze0806/go/encoding" + //JobCollector 工作信息采集器,用于统计整个工作的进度,错误信息等 //toto 当前未实现监控模块,为此需要在后面来实现这个接口的结构体 type JobCollector interface { - MessageMap() map[string][]string - MessageByKey(key string) []string + MessageMap() *encoding.JSON + MessageByKey(key string) *encoding.JSON } diff --git a/datax/common/plugin/job_test.go b/datax/common/plugin/job_test.go index 4f32d93..d2dec75 100644 --- a/datax/common/plugin/job_test.go +++ b/datax/common/plugin/job_test.go @@ -17,16 +17,18 @@ package plugin import ( "reflect" "testing" + + "github.com/Breeze0806/go/encoding" ) type mockJobCollector struct { } -func (m *mockJobCollector) MessageMap() map[string][]string { +func (m *mockJobCollector) MessageMap() *encoding.JSON { return nil } -func (m *mockJobCollector) MessageByKey(key string) []string { +func (m *mockJobCollector) MessageByKey(key string) *encoding.JSON { return nil } diff --git a/datax/core/container.go b/datax/core/container.go index 6a84c0c..133c1a0 100644 --- a/datax/core/container.go +++ b/datax/core/container.go @@ -16,7 +16,6 @@ package core import ( "github.com/Breeze0806/go-etl/config" - "github.com/Breeze0806/go-etl/datax/core/statistics/communication" ) //Container 容器 @@ -27,7 +26,6 @@ type Container interface { //BaseCotainer 基础容器 type BaseCotainer struct { conf *config.JSON - com *communication.Communication } //NewBaseCotainer 创建基础容器 @@ -44,13 +42,3 @@ func (b *BaseCotainer) SetConfig(conf *config.JSON) { func (b *BaseCotainer) Config() *config.JSON { return b.conf } - -//SetCommunication 未真正使用 -func (b *BaseCotainer) SetCommunication(com *communication.Communication) { - b.com = com -} - -//Communication 未真正使用 -func (b *BaseCotainer) Communication() *communication.Communication { - return b.com -} diff --git a/datax/core/container_test.go b/datax/core/container_test.go index 6c49589..2d0218a 100644 --- a/datax/core/container_test.go +++ b/datax/core/container_test.go @@ -19,7 +19,6 @@ import ( "testing" "github.com/Breeze0806/go-etl/config" - "github.com/Breeze0806/go-etl/datax/core/statistics/communication" ) func testJSONFromString(s string) *config.JSON { @@ -58,32 +57,3 @@ func TestBaseCotainer_SetConfig(t *testing.T) { }) } } - -func TestBaseCotainer_SetCommunication(t *testing.T) { - type args struct { - com *communication.Communication - } - tests := []struct { - name string - b *BaseCotainer - args args - want *communication.Communication - }{ - { - name: "1", - b: NewBaseCotainer(), - args: args{ - com: &communication.Communication{}, - }, - want: &communication.Communication{}, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - tt.b.SetCommunication(tt.args.com) - if got := tt.b.Communication(); !reflect.DeepEqual(got, tt.want) { - t.Errorf("Communication() = %v, want: %v", got, tt.want) - } - }) - } -} diff --git a/datax/core/job/container.go b/datax/core/job/container.go index 67b9219..716634e 100644 --- a/datax/core/job/container.go +++ b/datax/core/job/container.go @@ -178,7 +178,7 @@ func (c *Container) init() (err error) { writerConfig.Set(coreconst.DataxJobSetting, jobSettingConf) - collector := statplugin.NewDefaultJobCollector(c.Communication()) + collector := statplugin.NewDefaultJobCollector() c.jobReader, err = c.initReaderJob(collector, readerConfig, writerConfig) if err != nil { return diff --git a/datax/core/statistics/communication/comminication.go b/datax/core/statistics/communication/comminication.go deleted file mode 100644 index c74c21b..0000000 --- a/datax/core/statistics/communication/comminication.go +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2020 the go-etl Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package communication - -//Communication todo 暂时未实现 -type Communication struct { -} diff --git a/datax/core/statistics/container/communicator.go b/datax/core/statistics/container/communicator.go deleted file mode 100644 index 1a392dc..0000000 --- a/datax/core/statistics/container/communicator.go +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2020 the go-etl Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package container - -import ( - "github.com/Breeze0806/go-etl/config" - "github.com/Breeze0806/go-etl/datax/core/statistics/communication" -) - -//State 状态 -type State int - -//Communicator 交换器 todo 未使用 -type Communicator interface { - RegisterCommunication(configs []*config.JSON) - - Collect() Communicator - - Report(communication communication.Communication) - - CollectState() State - - GetCommunication(id int64) communication.Communication - - GetCommunicationMap() map[int64]communication.Communication -} diff --git a/datax/core/statistics/container/plugin/default_job_collector.go b/datax/core/statistics/container/plugin/default_job_collector.go index a02cfbe..d6afa10 100644 --- a/datax/core/statistics/container/plugin/default_job_collector.go +++ b/datax/core/statistics/container/plugin/default_job_collector.go @@ -16,23 +16,23 @@ package plugin import ( "github.com/Breeze0806/go-etl/datax/common/plugin" - "github.com/Breeze0806/go-etl/datax/core/statistics/communication" + "github.com/Breeze0806/go/encoding" ) //DefaultJobCollector 默认工作收集器 type DefaultJobCollector struct{} //NewDefaultJobCollector 创建默认工作收集器 -func NewDefaultJobCollector(*communication.Communication) plugin.JobCollector { +func NewDefaultJobCollector() plugin.JobCollector { return &DefaultJobCollector{} } //MessageMap 空方法 -func (d *DefaultJobCollector) MessageMap() map[string][]string { +func (d *DefaultJobCollector) MessageMap() *encoding.JSON { return nil } //MessageByKey 空方法 -func (d *DefaultJobCollector) MessageByKey(key string) []string { +func (d *DefaultJobCollector) MessageByKey(key string) *encoding.JSON { return nil } diff --git a/datax/core/taskgroup/task_execer.go b/datax/core/taskgroup/task_execer.go index c9f7a9a..9bbfd2b 100644 --- a/datax/core/taskgroup/task_execer.go +++ b/datax/core/taskgroup/task_execer.go @@ -24,7 +24,6 @@ import ( coreconst "github.com/Breeze0806/go-etl/datax/common/config/core" "github.com/Breeze0806/go-etl/datax/common/plugin/loader" "github.com/Breeze0806/go-etl/datax/common/spi/writer" - "github.com/Breeze0806/go-etl/datax/core/statistics/communication" "github.com/Breeze0806/go-etl/datax/core/taskgroup/runner" "github.com/Breeze0806/go-etl/datax/core/transport/channel" "github.com/Breeze0806/go-etl/datax/core/transport/exchange" @@ -43,14 +42,13 @@ type taskExecer struct { readerRunner runner.Runner //执行运行器 wg sync.WaitGroup errors chan error - //todo: taskCommunication没用 - taskCommunication communication.Communication - destroy sync.Once - key string - exchanger *exchange.RecordExchanger - cancalMutex sync.Mutex //由于取消函数会被多线程调用,需要加锁 - cancel context.CancelFunc //取消函数 - attemptCount *atomic.Int32 //执行次数 + + destroy sync.Once + key string + exchanger *exchange.RecordExchanger + cancalMutex sync.Mutex //由于取消函数会被多线程调用,需要加锁 + cancel context.CancelFunc //取消函数 + attemptCount *atomic.Int32 //执行次数 } //newTaskExecer 根据上下文ctx,任务配置taskConf,前缀关键字prefixKey diff --git a/release.bat b/release.bat index 4173968..2f63c55 100644 --- a/release.bat +++ b/release.bat @@ -8,4 +8,5 @@ go mod vendor go generate ./... cd cmd\datax go build -cd ../.. \ No newline at end of file +cd ../.. +go run tools/datax/release/main.go \ No newline at end of file diff --git a/tools/datax/release/main.go b/tools/datax/release/main.go new file mode 100644 index 0000000..df087a6 --- /dev/null +++ b/tools/datax/release/main.go @@ -0,0 +1,246 @@ +// Copyright 2020 the go-etl Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "archive/tar" + "archive/zip" + "bytes" + "compress/gzip" + "fmt" + "io" + "io/ioutil" + "os" + "os/exec" + "path/filepath" + "runtime" + "strings" +) + +var ( + sourceUserPath = "datax/" + destUserPath = "release/datax/" + + sourceExamplePath = "cmd/datax/examples" + destExamplePath = "release/examples" +) + +func main() { + err := copyMarkdown("plugin/reader") + if err != nil { + fmt.Println("copyMarkdown reader fail. err:", err) + os.Exit(1) + } + + err = copyMarkdown("plugin/writer") + if err != nil { + fmt.Println("copyMarkdown writer fail. err:", err) + os.Exit(1) + } + + err = copyConfig() + if err != nil { + fmt.Println("copyConfig fail. err:", err) + os.Exit(1) + } + + data, err := ioutil.ReadFile("README_USER.md") + if err != nil { + fmt.Println("ReadFile README_USER.md fail. err:", err) + os.Exit(1) + } + + err = ioutil.WriteFile("release/README_USER.md", data, 0644) + if err != nil { + fmt.Println("WriteFile release/README_USER.md fail. err:", err) + os.Exit(1) + } + + output := "" + if output, err = cmdOutput("git", "describe", "--abbrev=0", "--tags"); err != nil { + fmt.Printf("use git to tag version fail. error: %v\n", err) + os.Exit(1) + } + tagVersion := strings.ReplaceAll(output, "\r", "") + tagVersion = strings.ReplaceAll(tagVersion, "\n", "") + + if runtime.GOOS == "windows" { + os.Rename("cmd/datax/datax.exe", "release/datax.exe") + if err = zipDir("release", "datax-"+tagVersion+"-windows-x86_64.zip"); err != nil { + fmt.Printf("uzipDir fail. error: %v\n", err) + os.Exit(1) + } + } else if runtime.GOOS == "linux" { + os.Rename("cmd/datax/datax", "release/datax") + if err = tarDir("release", "datax-"+tagVersion+"-linux-x86_64.tar.gz"); err != nil { + fmt.Printf("tarDir fail. error: %v\n", err) + os.Exit(1) + } + } else { + fmt.Printf("OS: %v\n", runtime.GOOS) + os.Exit(1) + } +} + +func copyMarkdown(path string) (err error) { + var list []os.FileInfo + list, err = ioutil.ReadDir(filepath.Join(sourceUserPath, path)) + if err != nil { + return err + } + var data []byte + for _, v := range list { + if v.IsDir() { + data, err = ioutil.ReadFile(filepath.Join(sourceUserPath, path, v.Name(), "README.md")) + if err != nil { + err = nil + continue + } + os.MkdirAll(filepath.Join(destUserPath, path, v.Name()), 0644) + err = ioutil.WriteFile(filepath.Join(destUserPath, path, v.Name(), "README.md"), data, 0644) + if err != nil { + return + } + } + } + return +} + +func copyConfig() (err error) { + os.MkdirAll(destExamplePath, 0644) + var list []os.FileInfo + list, err = ioutil.ReadDir(sourceExamplePath) + if err != nil { + return err + } + var data []byte + for _, v := range list { + if v.IsDir() { + data, err = ioutil.ReadFile(filepath.Join(sourceExamplePath, v.Name(), "config.json")) + if err != nil { + err = nil + continue + } + os.MkdirAll(filepath.Join(destExamplePath, v.Name()), 0644) + err = ioutil.WriteFile(filepath.Join(destExamplePath, v.Name(), "config.json"), data, 0644) + if err != nil { + return + } + } + } + return +} + +func zipDir(src, dest string) error { + zipfile, err := os.Create(dest) + if err != nil { + return err + } + defer zipfile.Close() + + archive := zip.NewWriter(zipfile) + defer archive.Close() + + filepath.Walk(src, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + header, err := zip.FileInfoHeader(info) + if err != nil { + return err + } + + header.Name = path + if info.IsDir() { + header.Name += "/" + } else { + header.Method = zip.Deflate + } + + writer, err := archive.CreateHeader(header) + if err != nil { + return err + } + + if !info.IsDir() { + file, err := os.Open(path) + if err != nil { + return err + } + defer file.Close() + _, err = io.Copy(writer, file) + } + return err + }) + + return err +} + +func tarDir(src, dst string) error { + fw, err := os.Create(dst) + if err != nil { + return err + } + defer fw.Close() + + gw := gzip.NewWriter(fw) + defer gw.Close() + + tw := tar.NewWriter(gw) + defer tw.Close() + return filepath.Walk(src, func(fileName string, fi os.FileInfo, err error) error { + if err != nil { + return err + } + hdr, err := tar.FileInfoHeader(fi, "") + if err != nil { + return err + } + hdr.Name = strings.TrimPrefix(fileName, string(filepath.Separator)) + + if err := tw.WriteHeader(hdr); err != nil { + return err + } + + if !fi.Mode().IsRegular() { + return nil + } + + fr, err := os.Open(fileName) + defer fr.Close() + if err != nil { + return err + } + + _, err = io.Copy(tw, fr) + if err != nil { + return err + } + return nil + }) +} + +func cmdOutput(cmd string, arg ...string) (output string, err error) { + c := exec.Command(cmd, arg...) + var stdout, stderr bytes.Buffer + c.Stdout = &stdout + c.Stderr = &stderr + if err = c.Run(); err != nil { + err = fmt.Errorf("%v(%s)", err, stderr.String()) + return + } + return stdout.String(), nil +} From c48065ba2463d47bc7b86a8257ea15499b369e5f Mon Sep 17 00:00:00 2001 From: Breeze0806 Date: Fri, 16 Jun 2023 23:40:01 +0800 Subject: [PATCH 09/10] =?UTF-8?q?feat:=201.=E6=96=B0=E5=A2=9E=E5=A4=8D?= =?UTF-8?q?=E6=9D=82=E7=9B=91=E6=8E=A7=E7=89=B9=E6=80=A7=202.=E9=80=9A?= =?UTF-8?q?=E8=BF=87http=E7=AB=AF=E5=8F=A3=E8=8E=B7=E5=8F=96=E7=9B=91?= =?UTF-8?q?=E6=8E=A7=E4=BF=A1=E6=81=AF=203.=E6=96=B0=E5=A2=9Ehttp=E7=9A=84?= =?UTF-8?q?pprof=E4=BB=A5=E6=96=B9=E4=BE=BF=E8=B0=83=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 5 +- README_USER.md | 19 +---- cmd/datax/enveronment.go | 75 ++++++++++++++++++- cmd/datax/examples/limit/config.json | 4 +- cmd/datax/examples/limit/csv.json | 40 ---------- cmd/datax/examples/split/mysql.json | 63 ---------------- cmd/datax/handler.go | 25 +++++++ cmd/datax/main.go | 3 +- datax/common/plugin/job_collector.go | 4 +- datax/common/plugin/job_test.go | 4 +- datax/core/container.go | 15 +++- datax/core/job/container.go | 31 ++++---- datax/core/statistics/container/metrics.go | 47 ++++++++++++ .../container/plugin/default_job_collector.go | 21 +++--- datax/core/taskgroup/container.go | 73 ++++++++++-------- datax/core/taskgroup/task_execer.go | 6 +- datax/plugin/reader/resources/plugin.json | 6 -- datax/plugin/writer/resources/plugin.json | 6 -- go.mod | 1 + go.sum | 4 + 20 files changed, 247 insertions(+), 205 deletions(-) delete mode 100644 cmd/datax/examples/limit/csv.json delete mode 100644 cmd/datax/examples/split/mysql.json create mode 100644 cmd/datax/handler.go create mode 100644 datax/core/statistics/container/metrics.go delete mode 100644 datax/plugin/reader/resources/plugin.json delete mode 100644 datax/plugin/writer/resources/plugin.json diff --git a/README.md b/README.md index 01342c6..83afe07 100644 --- a/README.md +++ b/README.md @@ -34,9 +34,9 @@ go-etl将提供的etl能力如下: | 无结构流 | CSV | √ | √ | [读](datax/plugin/reader/csv/README.md)、[写](datax/plugin/writer/csv/README.md) | | | XLSX(excel) | √ | √ | [读](datax/plugin/reader/xlsx/README.md)、[写](datax/plugin/writer/xlsx/README.md) | -### 用户手册 +### 数据同步用户手册 -使用[go-etl用户手册](README_USER.md)开始数据同步 +使用[go-etl数据同步用户手册](README_USER.md)开始数据同步 ### 数据同步开发宝典 @@ -52,7 +52,6 @@ go-etl将提供的etl能力如下: - [x] 实现关系型数据库的任务切分 - [x] 实现监控模块 - [x] 实现流控模块 -- [ ] 实现关系型数据库入库断点续传 ### storage diff --git a/README_USER.md b/README_USER.md index a005084..ba9ee55 100644 --- a/README_USER.md +++ b/README_USER.md @@ -1,4 +1,4 @@ -# go-etl用户手册 +# go-etl数据同步用户手册 go-etl的datax是一个数据同步工具,目前支持MySQL,postgres,oracle,SQL SERVER,DB2等主流关系型数据库以及csv,xlsx文件之间的数据同步。 @@ -103,23 +103,6 @@ data -c config.json ] } } -``` -#### 流控配置 - -之前speed的byte和record配置并不会生效,现在加入流控特性后,byte和record将会生效,byte会限制缓存消息字节数,而record会限制缓存消息条数,如果byte设置过小会导致缓存过小而导致同步数据失败。当byte为0或负数时,限制器将不会工作,例如byte为10485760,现在为10Mb(10*1024*1024)。 -```json -{ - "job":{ - "setting":{ - "speed":{ - "byte":, - "record":10485760, - "channel":4 - } - } - } -} - ``` `reader`和`writer`的配置如下: diff --git a/cmd/datax/enveronment.go b/cmd/datax/enveronment.go index 271c5b7..dd44bea 100644 --- a/cmd/datax/enveronment.go +++ b/cmd/datax/enveronment.go @@ -16,10 +16,15 @@ package main import ( "context" + "fmt" "io/ioutil" + "net/http" + _ "net/http/pprof" + "time" "github.com/Breeze0806/go-etl/config" "github.com/Breeze0806/go-etl/datax" + "github.com/gorilla/handlers" ) type enveronment struct { @@ -28,9 +33,11 @@ type enveronment struct { err error ctx context.Context cancel context.CancelFunc + server *http.Server + addr string } -func newEnveronment(filename string) (e *enveronment) { +func newEnveronment(filename string, addr string) (e *enveronment) { e = &enveronment{} var buf []byte buf, e.err = ioutil.ReadFile(filename) @@ -42,20 +49,82 @@ func newEnveronment(filename string) (e *enveronment) { return e } e.ctx, e.cancel = context.WithCancel(context.Background()) + e.addr = addr return e } func (e *enveronment) build() error { + return e.initEngine().initServer().startEngine().err +} + +func (e *enveronment) initEngine() *enveronment { if e.err != nil { - return e.err + return e } e.engine = datax.NewEngine(e.ctx, e.config) + return e +} + +func (e *enveronment) initServer() *enveronment { + if e.err != nil { + return e + } + if e.addr != "" { + r := http.NewServeMux() + recoverHandler := handlers.RecoveryHandler(handlers.PrintRecoveryStack(true)) + r.Handle("/metrics", recoverHandler(newHandler(e.engine))) + e.server = &http.Server{ + Addr: e.addr, + Handler: handlers.CompressHandler(r), + } + go func() { + log.Debugf("listen begin: %v", e.addr) + defer log.Debugf("listen end: %v", e.addr) + if err := e.server.ListenAndServe(); err != nil { + log.Errorf("ListenAndServe fail. addr: %v err: %v", e.addr, err) + } + log.Infof("ListenAndServe success. addr: %v", e.addr) + }() + } + + return e +} + +func (e *enveronment) startEngine() *enveronment { + if e.err != nil { + return e + } + go func() { + statsTimer := time.NewTicker(5 * time.Second) + defer statsTimer.Stop() + exit := false + for { + select { + case <-statsTimer.C: + case <-e.ctx.Done(): + exit = true + default: + } + if e.engine.Container != nil { + fmt.Printf("%v\r", e.engine.Metrics().JSON()) + } + + if exit { + return + } + } + }() e.err = e.engine.Start() - return e.err + + return e } func (e *enveronment) close() { + if e.server != nil { + e.server.Shutdown(e.ctx) + } + if e.cancel != nil { e.cancel() } diff --git a/cmd/datax/examples/limit/config.json b/cmd/datax/examples/limit/config.json index 3e03009..0d5ba99 100644 --- a/cmd/datax/examples/limit/config.json +++ b/cmd/datax/examples/limit/config.json @@ -3,7 +3,7 @@ "container": { "job":{ "id": 1, - "sleepInterval":100 + "sleepInterval":1000 } } }, @@ -31,7 +31,7 @@ ], "setting":{ "speed":{ - "byte":10485760, + "byte":204800, "record":1024, "channel":4 } diff --git a/cmd/datax/examples/limit/csv.json b/cmd/datax/examples/limit/csv.json deleted file mode 100644 index 0c58e1f..0000000 --- a/cmd/datax/examples/limit/csv.json +++ /dev/null @@ -1,40 +0,0 @@ -{ - "core" : { - "container": { - "job":{ - "id": 1, - "sleepInterval":100 - } - } - }, - "job":{ - "content":[ - { - "reader":{ - "name": "csvreader", - "parameter": { - "path":["examples/limit/src.csv"], - "encoding":"utf-8", - "delimiter":"," - } - }, - "writer":{ - "name": "csvwriter", - "parameter": { - "path":["examples/limit/dest.csv"], - "encoding":"utf-8", - "delimiter":"," - } - }, - "transformer":[] - } - ], - "setting":{ - "speed":{ - "byte":1024, - "record":100, - "channel":4 - } - } - } -} \ No newline at end of file diff --git a/cmd/datax/examples/split/mysql.json b/cmd/datax/examples/split/mysql.json deleted file mode 100644 index 3e858af..0000000 --- a/cmd/datax/examples/split/mysql.json +++ /dev/null @@ -1,63 +0,0 @@ -{ - "core" : { - "container": { - "job":{ - "id": 1, - "sleepInterval":100 - } - } - }, - "job":{ - "content":[ - { - "reader":{ - "name": "mysqlreader", - "parameter": { - "username": "root", - "password": "123456", - "split" : { - "key":"str" - }, - "column": ["*"], - "connection": { - "url": "tcp(192.168.15.130:3306)/source?parseTime=false", - "table": { - "db":"source", - "name":"split" - } - }, - "where": "" - } - }, - "writer":{ - "name": "mysqlwriter", - "parameter": { - "username": "root", - "password": "123456", - "writeMode": "insert", - "column": ["*"], - "session": [], - "preSql": [], - "connection": { - "url": "tcp(192.168.15.130:3306)/mysql?parseTime=false", - "table": { - "db":"destination", - "name":"split" - } - }, - "batchTimeout": "1s", - "batchSize":1000 - } - }, - "transformer":[] - } - ], - "setting":{ - "speed":{ - "byte":0, - "record":1024, - "channel":4 - } - } - } -} \ No newline at end of file diff --git a/cmd/datax/handler.go b/cmd/datax/handler.go new file mode 100644 index 0000000..9301dea --- /dev/null +++ b/cmd/datax/handler.go @@ -0,0 +1,25 @@ +package main + +import ( + "net/http" + + "github.com/Breeze0806/go-etl/datax" +) + +type handler struct { + engine *datax.Engine +} + +func newHandler(engine *datax.Engine) *handler { + return &handler{ + engine: engine, + } +} + +func (h *handler) ServeHTTP(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + if h.engine.Metrics().JSON() == nil { + return + } + w.Write([]byte(h.engine.Metrics().JSON().String())) +} diff --git a/cmd/datax/main.go b/cmd/datax/main.go index 1623e8c..98d4486 100644 --- a/cmd/datax/main.go +++ b/cmd/datax/main.go @@ -26,6 +26,7 @@ func main() { initLog() var configFile = flag.String("c", "config.json", "config") var wizardFile = flag.String("w", "", "wizard") + var httpAddr = flag.String("http", "", "http") flag.Parse() if *wizardFile != "" { if err := tools.NewWizard(*configFile, *wizardFile).GenerateConfigsAndScripts(); err != nil { @@ -38,7 +39,7 @@ func main() { log.Infof("config: %v\n", *configFile) - e := newEnveronment(*configFile) + e := newEnveronment(*configFile, *httpAddr) defer e.close() if err := e.build(); err != nil { fmt.Printf("run fail. err: %v\n", err) diff --git a/datax/common/plugin/job_collector.go b/datax/common/plugin/job_collector.go index 013931a..09b13f1 100644 --- a/datax/common/plugin/job_collector.go +++ b/datax/common/plugin/job_collector.go @@ -19,6 +19,6 @@ import "github.com/Breeze0806/go/encoding" //JobCollector 工作信息采集器,用于统计整个工作的进度,错误信息等 //toto 当前未实现监控模块,为此需要在后面来实现这个接口的结构体 type JobCollector interface { - MessageMap() *encoding.JSON - MessageByKey(key string) *encoding.JSON + JSON() *encoding.JSON + JSONByKey(key string) *encoding.JSON } diff --git a/datax/common/plugin/job_test.go b/datax/common/plugin/job_test.go index d2dec75..66ca337 100644 --- a/datax/common/plugin/job_test.go +++ b/datax/common/plugin/job_test.go @@ -24,11 +24,11 @@ import ( type mockJobCollector struct { } -func (m *mockJobCollector) MessageMap() *encoding.JSON { +func (m *mockJobCollector) JSON() *encoding.JSON { return nil } -func (m *mockJobCollector) MessageByKey(key string) *encoding.JSON { +func (m *mockJobCollector) JSONByKey(key string) *encoding.JSON { return nil } diff --git a/datax/core/container.go b/datax/core/container.go index 133c1a0..7103ae9 100644 --- a/datax/core/container.go +++ b/datax/core/container.go @@ -16,16 +16,19 @@ package core import ( "github.com/Breeze0806/go-etl/config" + "github.com/Breeze0806/go-etl/datax/core/statistics/container" ) //Container 容器 type Container interface { Start() error + Metrics() *container.Metrics } //BaseCotainer 基础容器 type BaseCotainer struct { - conf *config.JSON + conf *config.JSON + metrics *container.Metrics } //NewBaseCotainer 创建基础容器 @@ -33,6 +36,16 @@ func NewBaseCotainer() *BaseCotainer { return &BaseCotainer{} } +//SetMetrics 设置指标 +func (b *BaseCotainer) SetMetrics(metrics *container.Metrics) { + b.metrics = metrics +} + +//Metrics 指标 +func (b *BaseCotainer) Metrics() *container.Metrics { + return b.metrics +} + //SetConfig 设置JSON配置 func (b *BaseCotainer) SetConfig(conf *config.JSON) { b.conf = conf diff --git a/datax/core/job/container.go b/datax/core/job/container.go index 716634e..792ff10 100644 --- a/datax/core/job/container.go +++ b/datax/core/job/container.go @@ -16,7 +16,6 @@ package job import ( "context" - "fmt" "math" "sort" "strconv" @@ -32,6 +31,7 @@ import ( "github.com/Breeze0806/go-etl/datax/common/spi/writer" "github.com/Breeze0806/go-etl/datax/common/util" "github.com/Breeze0806/go-etl/datax/core" + "github.com/Breeze0806/go-etl/datax/core/statistics/container" statplugin "github.com/Breeze0806/go-etl/datax/core/statistics/container/plugin" "github.com/Breeze0806/go-etl/datax/core/taskgroup" "github.com/Breeze0806/go-etl/schedule" @@ -68,10 +68,12 @@ func NewContainer(ctx context.Context, conf *config.JSON) (c *Container, err err ctx: ctx, } c.SetConfig(conf) + c.SetMetrics(container.NewMetrics()) c.jobID = c.Config().GetInt64OrDefaullt(coreconst.DataxCoreContainerJobID, -1) if c.jobID < 0 { return nil, errors.New("container job id is invalid") } + c.Metrics().Set("jobID", c.jobID) return } @@ -178,7 +180,7 @@ func (c *Container) init() (err error) { writerConfig.Set(coreconst.DataxJobSetting, jobSettingConf) - collector := statplugin.NewDefaultJobCollector() + collector := statplugin.NewDefaultJobCollector(c.Metrics()) c.jobReader, err = c.initReaderJob(collector, readerConfig, writerConfig) if err != nil { return @@ -290,28 +292,26 @@ func (c *Container) schedule() (err error) { goto End } taskGroups = append(taskGroups, taskGroup) - go func(taskGroup *taskgroup.Container) { + go func(taskGroup *taskgroup.Container, i int) { defer func() { - fmt.Printf("\n") c.wg.Done() }() - // timer := time.NewTimer(taskGroup.SleepInterval) - // defer timer.Stop() + statsTimer := time.NewTicker(taskGroup.SleepInterval) + defer statsTimer.Stop() for { select { case taskGroup.Err = <-errChan: + c.setStats(taskGroup, i) return case <-c.ctx.Done(): + c.setStats(taskGroup, i) return - case <-time.After(taskGroup.SleepInterval): + case <-statsTimer.C: + c.setStats(taskGroup, i) } - stats := taskGroup.Stats() - for _, v := range stats { - fmt.Printf("%s\r", v.String()) - } - } - }(taskGroup) + } + }(taskGroup, i) } End: c.wg.Wait() @@ -329,6 +329,11 @@ End: return } +func (c *Container) setStats(taskGroup *taskgroup.Container, i int) { + stats := taskGroup.Metrics().JSON().Clone() + c.Metrics().Set("metrics."+strconv.Itoa(i), stats) +} + //post 后置通知 func (c *Container) post() (err error) { if err = c.jobReader.Post(c.ctx); err != nil { diff --git a/datax/core/statistics/container/metrics.go b/datax/core/statistics/container/metrics.go new file mode 100644 index 0000000..c63b9a8 --- /dev/null +++ b/datax/core/statistics/container/metrics.go @@ -0,0 +1,47 @@ +package container + +import ( + "sync" + + "github.com/Breeze0806/go/encoding" +) + +//Metrics json格式指标 +type Metrics struct { + sync.RWMutex + + metricJSON *encoding.JSON +} + +//NewMetrics json格式指标 +func NewMetrics() *Metrics { + j, _ := encoding.NewJSONFromString("{}") + return &Metrics{ + metricJSON: j, + } +} + +//JSON json格式指标 +func (m *Metrics) JSON() *encoding.JSON { + m.RLock() + defer m.RUnlock() + return m.metricJSON +} + +//Set 设置path的value +func (m *Metrics) Set(path string, value interface{}) error { + m.Lock() + defer m.Unlock() + return m.metricJSON.Set(path, value) +} + +//Get 获得path的value +func (m *Metrics) Get(path string) *encoding.JSON { + m.RLock() + defer m.RUnlock() + j, err := m.metricJSON.GetJSON(path) + if err != nil { + return nil + } + return j +} diff --git a/datax/core/statistics/container/plugin/default_job_collector.go b/datax/core/statistics/container/plugin/default_job_collector.go index d6afa10..30532b4 100644 --- a/datax/core/statistics/container/plugin/default_job_collector.go +++ b/datax/core/statistics/container/plugin/default_job_collector.go @@ -16,23 +16,26 @@ package plugin import ( "github.com/Breeze0806/go-etl/datax/common/plugin" + "github.com/Breeze0806/go-etl/datax/core/statistics/container" "github.com/Breeze0806/go/encoding" ) //DefaultJobCollector 默认工作收集器 -type DefaultJobCollector struct{} +type DefaultJobCollector struct { + metrics *container.Metrics +} //NewDefaultJobCollector 创建默认工作收集器 -func NewDefaultJobCollector() plugin.JobCollector { - return &DefaultJobCollector{} +func NewDefaultJobCollector(metrics *container.Metrics) plugin.JobCollector { + return &DefaultJobCollector{metrics: metrics} } -//MessageMap 空方法 -func (d *DefaultJobCollector) MessageMap() *encoding.JSON { - return nil +//JSON json +func (d *DefaultJobCollector) JSON() *encoding.JSON { + return d.metrics.JSON() } -//MessageByKey 空方法 -func (d *DefaultJobCollector) MessageByKey(key string) *encoding.JSON { - return nil +//JSONByKey 空方法 +func (d *DefaultJobCollector) JSONByKey(key string) *encoding.JSON { + return d.metrics.Get(key) } diff --git a/datax/core/taskgroup/container.go b/datax/core/taskgroup/container.go index 7c14ca4..c29582f 100644 --- a/datax/core/taskgroup/container.go +++ b/datax/core/taskgroup/container.go @@ -16,6 +16,7 @@ package taskgroup import ( "context" + "strconv" "strings" "sync" "time" @@ -23,6 +24,7 @@ import ( "github.com/Breeze0806/go-etl/config" coreconst "github.com/Breeze0806/go-etl/datax/common/config/core" "github.com/Breeze0806/go-etl/datax/core" + "github.com/Breeze0806/go-etl/datax/core/statistics/container" "github.com/Breeze0806/go-etl/schedule" "github.com/pingcap/errors" ) @@ -53,6 +55,7 @@ func NewContainer(ctx context.Context, conf *config.JSON) (c *Container, err err ctx: ctx, } c.SetConfig(conf) + c.SetMetrics(container.NewMetrics()) c.jobID, err = c.Config().GetInt64(coreconst.DataxCoreContainerJobID) if err != nil { return nil, err @@ -61,7 +64,7 @@ func NewContainer(ctx context.Context, conf *config.JSON) (c *Container, err err if err != nil { return nil, err } - + c.Metrics().Set("taskGroupID", c.taskGroupID) c.SleepInterval = time.Duration( c.Config().GetInt64OrDefaullt(coreconst.DataxCoreContainerJobSleepinterval, 1000)) * time.Millisecond c.retryInterval = time.Duration( @@ -87,17 +90,6 @@ func (c *Container) Do() error { return c.Start() } -//Stats 获取统计信息 -func (c *Container) Stats() (stats []Stats) { - for _, v := range c.tasks.manager.Runs() { - stat := v.(*taskExecer).Stats() - stat.JobID = c.jobID - stat.TaskGroupID = c.taskGroupID - stats = append(stats, stat) - } - return -} - //Start 开始运行,使用任务调度器执行这些JSON配置 func (c *Container) Start() (err error) { log.Infof("datax job(%v) taskgruop(%v) start", c.jobID, c.taskGroupID) @@ -197,29 +189,46 @@ func (c *Container) startTaskExecer(te *taskExecer) (err error) { log.Debugf("datax job(%v) taskgruop(%v) task(%v) start", c.jobID, c.taskGroupID, te.Key()) go func(te *taskExecer) { defer c.wg.Done() - timer := time.NewTimer(c.retryInterval) - defer timer.Stop() - select { - case te.Err = <-errChan: - //当失败时,重试次数不超过最大重试次数,写入任务是否支持失败冲时,这些决定写入任务是否冲时 - if te.Err != nil && te.WriterSuportFailOverport() && te.AttemptCount() <= c.retryMaxCount { - log.Debugf("datax job(%v) taskgruop(%v) task(%v) shutdown and retry. attemptCount: %v err: %v", - c.jobID, c.taskGroupID, te.Key(), te.AttemptCount(), te.Err) - //关闭任务 - te.Shutdown() - select { - case <-timer.C: - case <-c.ctx.Done(): + statsTimer := time.NewTicker(c.SleepInterval) + defer statsTimer.Stop() + for { + select { + case te.Err = <-errChan: + //当失败时,重试次数不超过最大重试次数,写入任务是否支持失败冲时,这些决定写入任务是否冲时 + if te.Err != nil && te.WriterSuportFailOverport() && te.AttemptCount() <= c.retryMaxCount { + log.Debugf("datax job(%v) taskgruop(%v) task(%v) shutdown and retry. attemptCount: %v err: %v", + c.jobID, c.taskGroupID, te.Key(), te.AttemptCount(), te.Err) + //关闭任务 + te.Shutdown() + timer := time.NewTimer(c.retryInterval) + defer timer.Stop() + select { + case <-timer.C: + case <-c.ctx.Done(): + return + } + //从运行队列移到待执行队列 + c.tasks.removeRunAndPushRemain(te) + } else { + log.Debugf("datax job(%v) taskgruop(%v) task(%v) end", c.jobID, c.taskGroupID, te.Key()) + //从任务调度器移除 + c.tasks.removeRun(te) + c.setStats(te) + return } - //从运行队列移到待执行队列 - c.tasks.removeRunAndPushRemain(te) - } else { - log.Debugf("datax job(%v) taskgruop(%v) task(%v) end", c.jobID, c.taskGroupID, te.Key()) - //从任务调度器移除 - c.tasks.removeRun(te) + case <-c.ctx.Done(): + return + case <-statsTimer.C: + c.setStats(te) } - case <-c.ctx.Done(): } }(te) return } + +func (c *Container) setStats(te *taskExecer) { + key := "metrics." + strconv.FormatInt(te.taskID, 10) + stats := te.Stats() + + c.Metrics().Set(key, stats) +} diff --git a/datax/core/taskgroup/task_execer.go b/datax/core/taskgroup/task_execer.go index 9bbfd2b..5ca3928 100644 --- a/datax/core/taskgroup/task_execer.go +++ b/datax/core/taskgroup/task_execer.go @@ -251,10 +251,8 @@ Loop: //Stats 统计信息 type Stats struct { - JobID int64 `json:"jobID"` - TaskGroupID int64 `json:"taskGroupID"` - TaskID int64 `json:"taskID"` - Channel channel.StatsJSON `json:"channel"` + TaskID int64 `json:"taskID"` + Channel channel.StatsJSON `json:"channel"` } func (s *Stats) String() string { diff --git a/datax/plugin/reader/resources/plugin.json b/datax/plugin/reader/resources/plugin.json deleted file mode 100644 index 0564ae7..0000000 --- a/datax/plugin/reader/resources/plugin.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "name" : "reader", - "developer":"Breeze0806", - "dialect": "", - "description":"" -} \ No newline at end of file diff --git a/datax/plugin/writer/resources/plugin.json b/datax/plugin/writer/resources/plugin.json deleted file mode 100644 index fabb7c3..0000000 --- a/datax/plugin/writer/resources/plugin.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "name" : "writer", - "developer":"Breeze0806", - "dialect":"", - "description":"" -} \ No newline at end of file diff --git a/go.mod b/go.mod index dddb6a8..64acdb3 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,7 @@ require ( github.com/go-sql-driver/mysql v1.6.0 github.com/godror/godror v0.33.3 github.com/google/uuid v1.3.0 + github.com/gorilla/handlers v1.5.1 github.com/ibmdb/go_ibm_db v0.4.1 github.com/lib/pq v1.10.7 github.com/pingcap/errors v0.11.4 diff --git a/go.sum b/go.sum index 32e15c1..14f7919 100644 --- a/go.sum +++ b/go.sum @@ -11,6 +11,8 @@ github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/denisenkom/go-mssqldb v0.12.3 h1:pBSGx9Tq67pBOTLmxNuirNTeB8Vjmf886Kx+8Y+8shw= github.com/denisenkom/go-mssqldb v0.12.3/go.mod h1:k0mtMFOnU+AihqFxPMiF05rtiDrorD1Vrm1KEz5hxDo= github.com/dnaeon/go-vcr v1.2.0/go.mod h1:R4UdLID7HZT3taECzJs4YgbbH6PIGXB6W/sc5OLb6RQ= +github.com/felixge/httpsnoop v1.0.1 h1:lvB5Jl89CsZtGIWuTcDM1E/vkVs49/Ml7JJe07l8SPQ= +github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/go-logfmt/logfmt v0.5.1 h1:otpy5pqBCBZ1ng9RQ0dPu4PN7ba75Y/aA+UpowDyNVA= github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0= @@ -32,6 +34,8 @@ github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/gorilla/handlers v1.5.1 h1:9lRY6j8DEeeBT10CvO9hGW0gmky0BprnvDI5vfhUHH4= +github.com/gorilla/handlers v1.5.1/go.mod h1:t8XrUpc4KVXb7HGyJ4/cEnwQiaxrX/hz1Zv/4g96P1Q= github.com/ibmdb/go_ibm_db v0.4.1 h1:IYZqoKTzD9xtkzLIkp8u6zzg7/4v7nFOfHzF79agvak= github.com/ibmdb/go_ibm_db v0.4.1/go.mod h1:nl5aUh1IzBVExcqYXaZLApaq8RUvTEph3VP49UTmEvg= github.com/lib/pq v1.10.5/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= From c743b157f3b0017409302cff4520bd059a10398e Mon Sep 17 00:00:00 2001 From: Breeze0806 Date: Tue, 20 Jun 2023 22:41:26 +0800 Subject: [PATCH 10/10] =?UTF-8?q?docs=20&=20test=20&=20fix:=20=E6=96=B0?= =?UTF-8?q?=E5=A2=9E=E7=9B=91=E6=8E=A7=E6=80=A7=E8=83=BD=E6=B5=8B=E8=AF=95?= =?UTF-8?q?=EF=BC=8C=E4=BF=AE=E5=A4=8DtaskExecer=E9=87=8D=E8=AF=95?= =?UTF-8?q?=E7=9A=84bug,=E4=BF=AE=E6=94=B9=E6=96=87=E6=A1=A3=E8=AF=B4?= =?UTF-8?q?=E6=98=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 32 +--- README_USER.md | 10 +- cmd/datax/handler.go | 14 ++ datax/core/container_test.go | 36 +++++ datax/core/job/container.go | 22 +-- datax/core/job/container_test.go | 60 +++---- datax/core/statistics/container/metrics.go | 18 ++- .../core/statistics/container/metrics_test.go | 151 ++++++++++++++++++ .../container/plugin/default_job_collector.go | 4 +- .../plugin/default_job_collector_test.go | 79 +++++++++ datax/core/taskgroup/container.go | 42 ++--- datax/core/taskgroup/container_test.go | 73 +++++---- datax/core/taskgroup/help_test.go | 2 + 13 files changed, 423 insertions(+), 120 deletions(-) create mode 100644 datax/core/statistics/container/metrics_test.go create mode 100644 datax/core/statistics/container/plugin/default_job_collector_test.go diff --git a/README.md b/README.md index 83afe07..97de94b 100644 --- a/README.md +++ b/README.md @@ -46,13 +46,6 @@ go-etl将提供的etl能力如下: ### datax 本包将提供类似于阿里巴巴[DataX](https://github.com/alibaba/DataX)的接口去实现go的etl框架,目前主要实现了job框架内的数据同步能力. - -#### plan - -- [x] 实现关系型数据库的任务切分 -- [x] 实现监控模块 -- [x] 实现流控模块 - ### storage #### database @@ -64,28 +57,11 @@ go-etl将提供的etl能力如下: 主要用于字节流的解析,如文件,消息队列,elasticsearch等,字节流格式可以是cvs,json, xml等 #### file - -#### mq - -##### plan - -暂无时间安排计划,欢迎来实现 - -#### elasticsearch - -##### plan - -暂无时间安排计划,欢迎来实现 +主要用于文件的解析,如cvs,excel等 ### transform -主要用于类sql数据转化 - -#### plan - -- [ ] 引入tidb数据库的mysql解析能力 -- [ ] 引入tidb数据库的mysql函数计算能力 -- [ ] 运用mysql解析能力和mysql函数计算能力实现数据转化能力 +主要用于类sql数据转化,类似百度引擎 ### tools @@ -106,6 +82,10 @@ go generate ./... 数据源插件模板新增工具,用于新增一个reader或writer模板,配合发布命令使用,减少开发者负担 +##### plugin + +数据源插件打包工具 + #### license 用于自动新增go代码文件中许可证 diff --git a/README_USER.md b/README_USER.md index ba9ee55..f331cbe 100644 --- a/README_USER.md +++ b/README_USER.md @@ -429,13 +429,17 @@ datax -h 帮助显示 ```bash -Usage of datax: - -c string #数据源配置文件 +Usage of datax: + -c string config (default "config.json") - -w string #源目的配置向导文件 + -http string + http + -w string wizard ``` +-http 新增监听端口,如:8080, 开启后访问127.0.0.1:8080/metrics获取实时的吞吐量 + #### 2.3.2 查看版本 ``` diff --git a/cmd/datax/handler.go b/cmd/datax/handler.go index 9301dea..2608afc 100644 --- a/cmd/datax/handler.go +++ b/cmd/datax/handler.go @@ -1,3 +1,17 @@ +// Copyright 2020 the go-etl Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package main import ( diff --git a/datax/core/container_test.go b/datax/core/container_test.go index 2d0218a..24eb1da 100644 --- a/datax/core/container_test.go +++ b/datax/core/container_test.go @@ -19,6 +19,7 @@ import ( "testing" "github.com/Breeze0806/go-etl/config" + "github.com/Breeze0806/go-etl/datax/core/statistics/container" ) func testJSONFromString(s string) *config.JSON { @@ -57,3 +58,38 @@ func TestBaseCotainer_SetConfig(t *testing.T) { }) } } + +func TestBaseCotainer_SetGetMetrics(t *testing.T) { + type testStruct struct { + Path string `json:"path"` + } + + type args struct { + metrics *container.Metrics + } + m := container.NewMetrics() + m.Set("test", testStruct{Path: "value"}) + tests := []struct { + name string + b *BaseCotainer + args args + want string + }{ + { + name: "1", + b: NewBaseCotainer(), + args: args{ + metrics: m, + }, + want: `{"test":{"path":"value"}}`, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.b.SetMetrics(tt.args.metrics) + if got := tt.b.Metrics().JSON().String(); got != tt.want { + t.Errorf("Metrics() = %v, want: %v", got, tt.want) + } + }) + } +} diff --git a/datax/core/job/container.go b/datax/core/job/container.go index 792ff10..6c59050 100644 --- a/datax/core/job/container.go +++ b/datax/core/job/container.go @@ -54,6 +54,7 @@ type Container struct { endTransferTimeStamp int64 needChannelNumber int64 totalStage int + reportInterval time.Duration //todo ErrorRecordChecker未使用 errorLimit util.ErrorRecordChecker taskSchduler *schedule.TaskSchduler @@ -73,6 +74,7 @@ func NewContainer(ctx context.Context, conf *config.JSON) (c *Container, err err if c.jobID < 0 { return nil, errors.New("container job id is invalid") } + c.reportInterval = time.Duration(c.Config().GetFloat64OrDefaullt(coreconst.DataxCoreContainerJobReportinterval, 1)) * time.Second c.Metrics().Set("jobID", c.jobID) return } @@ -85,38 +87,38 @@ func (c *Container) Start() (err error) { log.Debugf("DataX jobContainer %v starts to preHandle.", c.jobID) if err = c.preHandle(); err != nil { - log.Errorf("DataX jobContainer %v preHandle failed.", c.jobID, err) + log.Errorf("DataX jobContainer %v preHandle failed. err: %v", c.jobID, err) return } log.Infof("DataX jobContainer %v starts to init.", c.jobID) if err = c.init(); err != nil { - log.Errorf("DataX jobContainer %v init failed.", c.jobID, err) + log.Errorf("DataX jobContainer %v init failed. err: %v", c.jobID, err) return } log.Infof("DataX jobContainer %v starts to prepare.", c.jobID) if err = c.prepare(); err != nil { - log.Errorf("DataX jobContainer %v prepare failed.", c.jobID, err) + log.Errorf("DataX jobContainer %v prepare failed. err: %v", c.jobID, err) return } - log.Infof("DataX jobContainer %v starts to split.", c.jobID) + log.Infof("DataX jobContainer %v starts to split. err: %v", c.jobID) if err = c.split(); err != nil { - log.Errorf("DataX jobContainer %v split failed.", c.jobID, err) + log.Errorf("DataX jobContainer %v split failed. err: %v", c.jobID, err) return } - log.Infof("DataX jobContainer %v starts to schedule.", c.jobID) + log.Infof("DataX jobContainer %v starts to schedule. err: %v", c.jobID) if err = c.schedule(); err != nil { - log.Errorf("DataX jobContainer %v schedule failed.", c.jobID, err) + log.Errorf("DataX jobContainer %v schedule failed. err: %v", c.jobID, err) return } log.Infof("DataX jobContainer %v starts to post.", c.jobID) if err = c.post(); err != nil { - log.Errorf("DataX jobContainer %v post failed.", c.jobID, err) + log.Errorf("DataX jobContainer %v post failed. err: %v", c.jobID, err) return } log.Debugf("DataX jobContainer %v starts to postHandle.", c.jobID) if err = c.postHandle(); err != nil { - log.Errorf("DataX jobContainer %v postHandle failed.", c.jobID, err) + log.Errorf("DataX jobContainer %v postHandle failed. err: %v", c.jobID, err) return } @@ -296,7 +298,7 @@ func (c *Container) schedule() (err error) { defer func() { c.wg.Done() }() - statsTimer := time.NewTicker(taskGroup.SleepInterval) + statsTimer := time.NewTicker(c.reportInterval) defer statsTimer.Stop() for { select { diff --git a/datax/core/job/container_test.go b/datax/core/job/container_test.go index 1e49997..392aa30 100644 --- a/datax/core/job/container_test.go +++ b/datax/core/job/container_test.go @@ -44,7 +44,7 @@ func TestNewContainer(t *testing.T) { "container": { "job":{ "id": -3, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -67,7 +67,7 @@ func TestNewContainer(t *testing.T) { "container": { "job":{ "id": "1000", - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -110,7 +110,7 @@ func TestContainer_preHandle(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -135,7 +135,7 @@ func TestContainer_preHandle(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -157,7 +157,7 @@ func TestContainer_preHandle(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -183,7 +183,7 @@ func TestContainer_preHandle(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -209,7 +209,7 @@ func TestContainer_preHandle(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -251,7 +251,7 @@ func TestContainer_postHandle(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -276,7 +276,7 @@ func TestContainer_postHandle(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -298,7 +298,7 @@ func TestContainer_postHandle(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -324,7 +324,7 @@ func TestContainer_postHandle(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -350,7 +350,7 @@ func TestContainer_postHandle(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -406,7 +406,7 @@ func TestContainer_init(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -443,7 +443,7 @@ func TestContainer_init(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -471,7 +471,7 @@ func TestContainer_init(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -503,7 +503,7 @@ func TestContainer_init(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -537,7 +537,7 @@ func TestContainer_init(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -572,7 +572,7 @@ func TestContainer_init(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -610,7 +610,7 @@ func TestContainer_init(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -648,7 +648,7 @@ func TestContainer_init(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -686,7 +686,7 @@ func TestContainer_init(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -755,7 +755,7 @@ func TestContainer_prepare(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -792,7 +792,7 @@ func TestContainer_prepare(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -830,7 +830,7 @@ func TestContainer_prepare(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -899,7 +899,7 @@ func TestContainer_post(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -936,7 +936,7 @@ func TestContainer_post(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -974,7 +974,7 @@ func TestContainer_post(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -1042,7 +1042,7 @@ func TestContainer_destroy(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -1079,7 +1079,7 @@ func TestContainer_destroy(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, @@ -1117,7 +1117,7 @@ func TestContainer_destroy(t *testing.T) { "container": { "job":{ "id": 1, - "sleepInterval":100 + "reportInterval":100 }, "taskGroup":{ "id": 30000001, diff --git a/datax/core/statistics/container/metrics.go b/datax/core/statistics/container/metrics.go index c63b9a8..6ea777c 100644 --- a/datax/core/statistics/container/metrics.go +++ b/datax/core/statistics/container/metrics.go @@ -1,3 +1,17 @@ +// Copyright 2020 the go-etl Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package container import ( @@ -36,10 +50,10 @@ func (m *Metrics) Set(path string, value interface{}) error { } //Get 获得path的value -func (m *Metrics) Get(path string) *encoding.JSON { +func (m *Metrics) Get(key string) *encoding.JSON { m.RLock() defer m.RUnlock() - j, err := m.metricJSON.GetJSON(path) + j, err := m.metricJSON.GetJSON(key) if err != nil { return nil } diff --git a/datax/core/statistics/container/metrics_test.go b/datax/core/statistics/container/metrics_test.go new file mode 100644 index 0000000..0a7cc9d --- /dev/null +++ b/datax/core/statistics/container/metrics_test.go @@ -0,0 +1,151 @@ +// Copyright 2020 the go-etl Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package container + +import ( + "reflect" + "testing" + + "github.com/Breeze0806/go/encoding" +) + +func testJSON(json string) *encoding.JSON { + j, _ := encoding.NewJSONFromString(json) + return j +} + +func TestMetrics_JSON(t *testing.T) { + tests := []struct { + name string + m *Metrics + want *encoding.JSON + }{ + { + name: "1", + m: &Metrics{ + metricJSON: testJSON(`{"test":"metrics"}`), + }, + want: testJSON(`{"test":"metrics"}`), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.m.JSON(); !reflect.DeepEqual(got, tt.want) { + t.Errorf("Metrics.JSON() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestNewMetrics(t *testing.T) { + tests := []struct { + name string + want *Metrics + }{ + { + name: "1", + want: &Metrics{ + metricJSON: testJSON(`{}`), + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := NewMetrics(); !reflect.DeepEqual(got, tt.want) { + t.Errorf("NewMetrics() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestMetrics_Set(t *testing.T) { + type args struct { + path string + value interface{} + } + tests := []struct { + name string + m *Metrics + args args + want string + wantErr bool + }{ + { + name: "1", + m: NewMetrics(), + args: args{ + path: "path", + value: "value", + }, + want: "value", + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if err := tt.m.Set(tt.args.path, tt.args.value); (err != nil) != tt.wantErr { + t.Errorf("Metrics.Set() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got, _ := tt.m.JSON().GetString(tt.args.path); got != tt.want { + t.Errorf("Metrics.Set() got = %v, want %v", got, tt.want) + } + }) + } +} + +func TestMetrics_Get(t *testing.T) { + type args struct { + key string + } + tests := []struct { + name string + m *Metrics + args args + want *encoding.JSON + }{ + { + name: "1", + m: &Metrics{ + metricJSON: testJSON(`{"test":{"path":"value"}}`), + }, + args: args{ + key: "test", + }, + want: testJSON(`{"path":"value"}`), + }, + { + name: "1", + m: &Metrics{ + metricJSON: testJSON(`{"test":{"path":"value"}}`), + }, + args: args{ + key: "test.path", + }, + want: nil, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := tt.m.Get(tt.args.key) + if tt.want == nil && got == nil { + return + } + if got.String() != tt.want.String() { + t.Errorf("Metrics.Get() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/datax/core/statistics/container/plugin/default_job_collector.go b/datax/core/statistics/container/plugin/default_job_collector.go index 30532b4..b297021 100644 --- a/datax/core/statistics/container/plugin/default_job_collector.go +++ b/datax/core/statistics/container/plugin/default_job_collector.go @@ -30,12 +30,12 @@ func NewDefaultJobCollector(metrics *container.Metrics) plugin.JobCollector { return &DefaultJobCollector{metrics: metrics} } -//JSON json +//JSON 获取json的指标 func (d *DefaultJobCollector) JSON() *encoding.JSON { return d.metrics.JSON() } -//JSONByKey 空方法 +//JSONByKey 获取关键字是key的json的指标 func (d *DefaultJobCollector) JSONByKey(key string) *encoding.JSON { return d.metrics.Get(key) } diff --git a/datax/core/statistics/container/plugin/default_job_collector_test.go b/datax/core/statistics/container/plugin/default_job_collector_test.go new file mode 100644 index 0000000..a45eba0 --- /dev/null +++ b/datax/core/statistics/container/plugin/default_job_collector_test.go @@ -0,0 +1,79 @@ +// Copyright 2020 the go-etl Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package plugin + +import ( + "reflect" + "testing" + + "github.com/Breeze0806/go-etl/datax/core/statistics/container" +) + +type testStruct struct { + Path string `json:"path"` +} + +func TestDefaultJobCollector_JSON(t *testing.T) { + m := container.NewMetrics() + m.Set("test", testStruct{Path: "value"}) + tests := []struct { + name string + d *DefaultJobCollector + want string + }{ + { + name: "1", + d: NewDefaultJobCollector(m).(*DefaultJobCollector), + want: `{"test":{"path":"value"}}`, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.d.JSON().String(); !reflect.DeepEqual(got, tt.want) { + t.Errorf("DefaultJobCollector.JSON() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestDefaultJobCollector_JSONByKey(t *testing.T) { + m := container.NewMetrics() + m.Set("test", testStruct{Path: "value"}) + type args struct { + key string + } + tests := []struct { + name string + d *DefaultJobCollector + args args + want string + }{ + { + name: "1", + d: NewDefaultJobCollector(m).(*DefaultJobCollector), + args: args{ + key: "test", + }, + want: `{"path":"value"}`, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.d.JSONByKey(tt.args.key).String(); !reflect.DeepEqual(got, tt.want) { + t.Errorf("DefaultJobCollector.JSONByKey() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/datax/core/taskgroup/container.go b/datax/core/taskgroup/container.go index c29582f..8ce670b 100644 --- a/datax/core/taskgroup/container.go +++ b/datax/core/taskgroup/container.go @@ -35,15 +35,15 @@ type Container struct { Err error - jobID int64 - taskGroupID int64 - scheduler *schedule.TaskSchduler - wg sync.WaitGroup - tasks *taskManager - ctx context.Context - SleepInterval time.Duration - retryInterval time.Duration - retryMaxCount int32 + jobID int64 + taskGroupID int64 + scheduler *schedule.TaskSchduler + wg sync.WaitGroup + tasks *taskManager + ctx context.Context + ReportInterval time.Duration + retryInterval time.Duration + retryMaxCount int32 } //NewContainer 根据JSON配置conf创建任务组容器 @@ -65,13 +65,13 @@ func NewContainer(ctx context.Context, conf *config.JSON) (c *Container, err err return nil, err } c.Metrics().Set("taskGroupID", c.taskGroupID) - c.SleepInterval = time.Duration( - c.Config().GetInt64OrDefaullt(coreconst.DataxCoreContainerJobSleepinterval, 1000)) * time.Millisecond + c.ReportInterval = time.Duration( + c.Config().GetInt64OrDefaullt(coreconst.DataxCoreContainerTaskGroupReportinterval, 1)) * time.Second c.retryInterval = time.Duration( - c.Config().GetInt64OrDefaullt(coreconst.DataxCoreContainerTaskFailoverMaxretrytimes, 10000)) * time.Millisecond + c.Config().GetInt64OrDefaullt(coreconst.DataxCoreContainerTaskFailoverRetryintervalinmsec, 1000)) * time.Millisecond c.retryMaxCount = int32(c.Config().GetInt64OrDefaullt(coreconst.DataxCoreContainerTaskFailoverMaxretrytimes, 1)) - log.Infof("datax job(%v) taskgruop(%v) sleepInterval: %v retryInterval: %v retryMaxCount: %v config: %v", - c.jobID, c.taskGroupID, c.SleepInterval, c.retryInterval, c.retryMaxCount, conf) + log.Infof("datax job(%v) taskgruop(%v) reportInterval: %v retryInterval: %v retryMaxCount: %v config: %v", + c.jobID, c.taskGroupID, c.ReportInterval, c.retryInterval, c.retryMaxCount, conf) return } @@ -127,7 +127,7 @@ func (c *Container) Start() (err error) { } } log.Infof("datax job(%v) taskgruop(%v) manage tasks", c.jobID, c.taskGroupID) - ticker := time.NewTicker(c.SleepInterval) + ticker := time.NewTicker(c.ReportInterval) defer ticker.Stop() QueueLoop: //任务队列不为空 @@ -186,10 +186,13 @@ func (c *Container) startTaskExecer(te *taskExecer) (err error) { c.wg.Done() return err } - log.Debugf("datax job(%v) taskgruop(%v) task(%v) start", c.jobID, c.taskGroupID, te.Key()) go func(te *taskExecer) { - defer c.wg.Done() - statsTimer := time.NewTicker(c.SleepInterval) + log.Debugf("datax job(%v) taskgruop(%v) task(%v) start", c.jobID, c.taskGroupID, te.Key()) + defer func() { + log.Debugf("datax job(%v) taskgruop(%v) task(%v) end", c.jobID, c.taskGroupID, te.Key()) + c.wg.Done() + }() + statsTimer := time.NewTicker(c.ReportInterval) defer statsTimer.Stop() for { select { @@ -210,12 +213,11 @@ func (c *Container) startTaskExecer(te *taskExecer) (err error) { //从运行队列移到待执行队列 c.tasks.removeRunAndPushRemain(te) } else { - log.Debugf("datax job(%v) taskgruop(%v) task(%v) end", c.jobID, c.taskGroupID, te.Key()) //从任务调度器移除 c.tasks.removeRun(te) c.setStats(te) - return } + return case <-c.ctx.Done(): return case <-statsTimer.C: diff --git a/datax/core/taskgroup/container_test.go b/datax/core/taskgroup/container_test.go index 3a7dab0..6573f30 100644 --- a/datax/core/taskgroup/container_test.go +++ b/datax/core/taskgroup/container_test.go @@ -40,11 +40,13 @@ func TestContainer_Do(t *testing.T) { "core" : { "container": { "job":{ - "id": 1, - "sleepInterval":100 + "id": 1 }, "taskGroup":{ "id": 1, + "reportInterval":1 + }, + "task":{ "failover":{ "retryIntervalInMsec":10 } @@ -115,11 +117,12 @@ func TestContainer_DoCancel1(t *testing.T) { "core" : { "container": { "job":{ - "id": 1, - "sleepInterval":100 + "id": 1 }, "taskGroup":{ - "id": 1, + "id": 1 + }, + "task":{ "failover":{ "retryIntervalInMsec":10 } @@ -164,11 +167,13 @@ func TestContainer_DoCancel2(t *testing.T) { "core" : { "container": { "job":{ - "id": 1, - "sleepInterval":100 + "id": 1 }, "taskGroup":{ "id": 1, + "reportInterval":1 + }, + "task":{ "failover":{ "retryIntervalInMsec":10 } @@ -196,7 +201,7 @@ func TestContainer_DoCancel2(t *testing.T) { cancel() }() - if err := c.Do(); err != context.Canceled { + if err := c.Do(); err == nil { t.Errorf("Do error: %v", err) } } @@ -214,12 +219,14 @@ func TestContainer_JobId(t *testing.T) { "container": { "job":{ "id": 30000000, - "sleepInterval":100 + "reportInterval":1 }, "taskGroup":{ - "id": 1, + "id": 1 + }, + "task":{ "failover":{ - "retryIntervalInMsec":0 + "retryIntervalInMsec":10 } } } @@ -234,12 +241,14 @@ func TestContainer_JobId(t *testing.T) { "container": { "job":{ "id": 1000000000000000000, - "sleepInterval":100 + "reportInterval":1 }, "taskGroup":{ - "id": 1, + "id": 1 + }, + "task":{ "failover":{ - "retryIntervalInMsec":0 + "retryIntervalInMsec":10 } } } @@ -271,10 +280,12 @@ func TestContainer_TaskGroupId(t *testing.T) { "container": { "job":{ "id": 30000000, - "sleepInterval":100 + "reportInterval":1 }, "taskGroup":{ - "id": 30000001, + "id": 30000001 + }, + "task":{ "failover":{ "retryIntervalInMsec":0 } @@ -291,7 +302,7 @@ func TestContainer_TaskGroupId(t *testing.T) { "container": { "job":{ "id": 1000000000000000000, - "sleepInterval":100 + "reportInterval":1 }, "taskGroup":{ "id": 1000000000000000001, @@ -327,10 +338,12 @@ func TestContainer_Start(t *testing.T) { "container": { "job":{ "id": 30000000, - "sleepInterval":100 + "reportInterval":1 }, "taskGroup":{ - "id": 30000001, + "id": 30000001 + }, + "task":{ "failover":{ "retryIntervalInMsec":0 } @@ -370,10 +383,12 @@ func TestNewContainer(t *testing.T) { "container": { "job":{ "id": "30000000", - "sleepInterval":100 + "reportInterval":1 }, "taskGroup":{ - "id": 30000001, + "id": 30000001 + }, + "task":{ "failover":{ "retryIntervalInMsec":0 } @@ -394,12 +409,14 @@ func TestNewContainer(t *testing.T) { "container": { "job":{ "id": 30000002, - "sleepInterval":100 + "reportInterval":1 }, "taskGroup":{ - "id": "30000001", + "id": "30000001" + }, + "task":{ "failover":{ - "retryIntervalInMsec":0 + "retryIntervalInMsec":10 } } } @@ -438,12 +455,14 @@ func TestContainer_startTaskExecer(t *testing.T) { "container": { "job":{ "id": 30000000, - "sleepInterval":100 + "reportInterval":1 }, "taskGroup":{ - "id": 30000001, + "id": 30000001 + }, + "task":{ "failover":{ - "retryIntervalInMsec":0 + "retryIntervalInMsec":10 } } } diff --git a/datax/core/taskgroup/help_test.go b/datax/core/taskgroup/help_test.go index 5ff0dda..5e1a20e 100644 --- a/datax/core/taskgroup/help_test.go +++ b/datax/core/taskgroup/help_test.go @@ -25,6 +25,7 @@ import ( "github.com/Breeze0806/go-etl/datax/common/plugin/loader" "github.com/Breeze0806/go-etl/datax/common/spi/reader" "github.com/Breeze0806/go-etl/datax/common/spi/writer" + "github.com/Breeze0806/go-etl/element" ) type mockPlugin struct { @@ -120,6 +121,7 @@ func newMockRandReaderTask(errs []error) *mockRandReaderTask { } func (m *mockRandReaderTask) StartRead(ctx context.Context, sender plugin.RecordSender) error { + defer sender.SendWriter(element.GetTerminateRecord()) if x := m.rand.Int31n(math.MaxInt16); x < math.MaxInt16/2 { return m.startReadErr }