This repository has been archived by the owner on Dec 8, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 66
mydumper: fix parquet data parser #435
Merged
Merged
Changes from 4 commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
185c16b
fix parquet
glorv 17a8f46
Merge branch 'master' into fix-parquet
glorv f103606
reorder imports
glorv 44ec8ed
fix test
glorv b3d977d
use empty collation
glorv 1eef2d8
fix a error and add more test cases
glorv ad2c941
Merge branch 'master' into fix-parquet
glorv 9c69990
add pointer type tests
glorv cd9d071
resolve comments
glorv cab1c8c
Merge branch 'master' into fix-parquet
glorv 6bcff1a
Merge branch 'master' into fix-parquet
glorv 5f43bd6
Merge branch 'master' into fix-parquet
kennytm File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -6,11 +6,11 @@ import ( | |||
"path/filepath" | ||||
"strconv" | ||||
|
||||
"github.com/pingcap/br/pkg/storage" | ||||
|
||||
"github.com/pingcap/tidb/types" | ||||
"github.com/pingcap/parser/mysql" | ||||
|
||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||
"github.com/pingcap/br/pkg/storage" | ||||
. "github.com/pingcap/check" | ||||
"github.com/pingcap/tidb/types" | ||||
"github.com/xitongsys/parquet-go-source/local" | ||||
writer2 "github.com/xitongsys/parquet-go/writer" | ||||
) | ||||
|
@@ -57,7 +57,7 @@ func (s testParquetParserSuite) TestParquetParser(c *C) { | |||
verifyRow := func(i int) { | ||||
c.Assert(reader.lastRow.RowID, Equals, int64(i+1)) | ||||
c.Assert(len(reader.lastRow.Row), Equals, 2) | ||||
c.Assert(reader.lastRow.Row[0], DeepEquals, types.NewCollationStringDatum(strconv.Itoa(i), "", 0)) | ||||
c.Assert(reader.lastRow.Row[0], DeepEquals, types.NewCollationStringDatum(strconv.Itoa(i), mysql.DefaultCollationName, 0)) | ||||
c.Assert(reader.lastRow.Row[1], DeepEquals, types.NewIntDatum(int64(i))) | ||||
} | ||||
|
||||
|
@@ -81,3 +81,69 @@ func (s testParquetParserSuite) TestParquetParser(c *C) { | |||
|
||||
c.Assert(reader.ReadRow(), Equals, io.EOF) | ||||
} | ||||
|
||||
func (s testParquetParserSuite) TestParquetVariousTypes(c *C) { | ||||
type Test struct { | ||||
Date int32 `parquet:"name=date, type=DATE"` | ||||
TimeMillis int32 `parquet:"name=timemillis, type=TIME_MILLIS"` | ||||
TimeMicros int64 `parquet:"name=timemicros, type=TIME_MICROS"` | ||||
TimestampMillis int64 `parquet:"name=timestampmillis, type=TIMESTAMP_MILLIS"` | ||||
TimestampMicros int64 `parquet:"name=timestampmicros, type=TIMESTAMP_MICROS"` | ||||
|
||||
Decimal1 int32 `parquet:"name=decimal1, type=DECIMAL, scale=2, precision=9, basetype=INT32"` | ||||
Decimal2 int32 `parquet:"name=decimal2, type=DECIMAL, scale=4, precision=4, basetype=INT32"` | ||||
Decimal3 int64 `parquet:"name=decimal3, type=DECIMAL, scale=2, precision=18, basetype=INT64"` | ||||
Decimal4 string `parquet:"name=decimal4, type=DECIMAL, scale=2, precision=10, basetype=FIXED_LEN_BYTE_ARRAY, length=12"` | ||||
Decimal5 string `parquet:"name=decimal5, type=DECIMAL, scale=2, precision=20, basetype=BYTE_ARRAY"` | ||||
} | ||||
|
||||
dir := c.MkDir() | ||||
// prepare data | ||||
name := "test123.parquet" | ||||
testPath := filepath.Join(dir, name) | ||||
pf, err := local.NewLocalFileWriter(testPath) | ||||
c.Assert(err, IsNil) | ||||
test := &Test{} | ||||
writer, err := writer2.NewParquetWriter(pf, test, 2) | ||||
c.Assert(err, IsNil) | ||||
|
||||
v := &Test{ | ||||
Date: 18564, //2020-10-29 | ||||
TimeMillis: 62775123, // 17:26:15.123 | ||||
TimeMicros: 62775123000, // 17:26:15.123 | ||||
TimestampMillis: 1603963672356, // 2020-10-29T17:27:52.356 | ||||
TimestampMicros: 1603963672356956, //2020-10-29T17:27:52.356956 | ||||
Decimal1: -12345678, // -123456.78 | ||||
Decimal2: 456, // 0.0456 | ||||
Decimal3: 123456789012345678, //1234567890123456.78 | ||||
kennytm marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
Decimal4: "-12345678.09", | ||||
Decimal5: "-1234567890123456.78", | ||||
} | ||||
c.Assert(writer.Write(v), IsNil) | ||||
c.Assert(writer.WriteStop(), IsNil) | ||||
c.Assert(pf.Close(), IsNil) | ||||
|
||||
store, err := storage.NewLocalStorage(dir) | ||||
c.Assert(err, IsNil) | ||||
r, err := store.Open(context.TODO(), name) | ||||
c.Assert(err, IsNil) | ||||
reader, err := NewParquetParser(context.TODO(), store, r, name) | ||||
c.Assert(err, IsNil) | ||||
defer reader.Close() | ||||
|
||||
c.Assert(len(reader.columns), Equals, 10) | ||||
|
||||
c.Assert(reader.ReadRow(), IsNil) | ||||
c.Assert(reader.lastRow.Row, DeepEquals, []types.Datum{ | ||||
types.NewStringDatum("2020-10-29"), | ||||
types.NewStringDatum("17:26:15.123"), | ||||
types.NewStringDatum("17:26:15.123"), | ||||
types.NewStringDatum("2020-10-29 17:27:52.356"), | ||||
types.NewStringDatum("2020-10-29 17:27:52.356"), | ||||
types.NewStringDatum("-123456.78"), | ||||
types.NewStringDatum("0.0456"), | ||||
types.NewStringDatum("1234567890123456.78"), | ||||
types.NewStringDatum("-12345678.09"), | ||||
types.NewStringDatum("-1234567890123456.78"), | ||||
}) | ||||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is
mysql.DefaultCollationName
fit for all scenarios?