Skip to content

Commit

Permalink
Merge pull request #180 from mhivestasoffshore/feature/decimal-bugfix
Browse files Browse the repository at this point in the history
Fixing problems with high scale decimals
  • Loading branch information
xitongsys committed Oct 23, 2019
2 parents 7e0eab7 + ee70c33 commit 7fc2d91
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 48 deletions.
38 changes: 19 additions & 19 deletions types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,32 +70,32 @@ func TypeNameToParquetType(name string, baseName string) (*parquet.Type, *parque
}

func ParquetTypeToGoReflectType(pT *parquet.Type, rT *parquet.FieldRepetitionType) reflect.Type {
if rT==nil || *rT != parquet.FieldRepetitionType_OPTIONAL {
if rT == nil || *rT != parquet.FieldRepetitionType_OPTIONAL {
if *pT == parquet.Type_BOOLEAN {
return reflect.TypeOf(true)

}else if *pT == parquet.Type_INT32 {
} else if *pT == parquet.Type_INT32 {
return reflect.TypeOf(int32(0))

}else if *pT == parquet.Type_INT64 {
} else if *pT == parquet.Type_INT64 {
return reflect.TypeOf(int64(0))

}else if *pT == parquet.Type_INT96 {
} else if *pT == parquet.Type_INT96 {
return reflect.TypeOf("")

}else if *pT == parquet.Type_FLOAT {
} else if *pT == parquet.Type_FLOAT {
return reflect.TypeOf(float32(0))

}else if *pT == parquet.Type_DOUBLE {
} else if *pT == parquet.Type_DOUBLE {
return reflect.TypeOf(float64(0))

}else if *pT == parquet.Type_BYTE_ARRAY {
} else if *pT == parquet.Type_BYTE_ARRAY {
return reflect.TypeOf("")

}else if *pT == parquet.Type_FIXED_LEN_BYTE_ARRAY {
} else if *pT == parquet.Type_FIXED_LEN_BYTE_ARRAY {
return reflect.TypeOf("")

}else {
} else {
return nil
}

Expand All @@ -104,35 +104,35 @@ func ParquetTypeToGoReflectType(pT *parquet.Type, rT *parquet.FieldRepetitionTyp
v := true
return reflect.TypeOf(&v)

}else if *pT == parquet.Type_INT32 {
} else if *pT == parquet.Type_INT32 {
v := int32(0)
return reflect.TypeOf(&v)

}else if *pT == parquet.Type_INT64 {
} else if *pT == parquet.Type_INT64 {
v := int64(0)
return reflect.TypeOf(&v)

}else if *pT == parquet.Type_INT96 {
} else if *pT == parquet.Type_INT96 {
v := ""
return reflect.TypeOf(&v)

}else if *pT == parquet.Type_FLOAT {
} else if *pT == parquet.Type_FLOAT {
v := float32(0)
return reflect.TypeOf(&v)

}else if *pT == parquet.Type_DOUBLE {
} else if *pT == parquet.Type_DOUBLE {
v := float64(0)
return reflect.TypeOf(&v)

}else if *pT == parquet.Type_BYTE_ARRAY {
} else if *pT == parquet.Type_BYTE_ARRAY {
v := ""
return reflect.TypeOf(&v)

}else if *pT == parquet.Type_FIXED_LEN_BYTE_ARRAY {
} else if *pT == parquet.Type_FIXED_LEN_BYTE_ARRAY {
v := ""
return reflect.TypeOf(&v)

}else {
} else {
return nil
}
}
Expand Down Expand Up @@ -265,12 +265,12 @@ func StrToParquetType(s string, pT *parquet.Type, cT *parquet.ConvertedType, len
return int64(tmp)

} else if *pT == parquet.Type_FIXED_LEN_BYTE_ARRAY {
s = num.String()
s = num.Text('f', 0)
res := StrIntToBinary(s, "BigEndian", length, true)
return res

} else {
s = num.String()
s = num.Text('f', 0)
res := StrIntToBinary(s, "BigEndian", 0, true)
return res
}
Expand Down
62 changes: 33 additions & 29 deletions types/types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,42 +15,46 @@ func TestStrToParquetType(t *testing.T) {
GoData interface{}
PT *parquet.Type
CT *parquet.ConvertedType
Length int
Scale int
}{
{"false", bool(false), parquet.TypePtr(parquet.Type_BOOLEAN), nil},
{"1", int32(1), parquet.TypePtr(parquet.Type_INT32), nil},
{"0", int64(0), parquet.TypePtr(parquet.Type_INT64), nil},
{"12345", StrIntToBinary("12345", "LittleEndian", 12, true), parquet.TypePtr(parquet.Type_INT96), nil},
{"0.1", float32(0.1), parquet.TypePtr(parquet.Type_FLOAT), nil},
{"0.1", float64(0.1), parquet.TypePtr(parquet.Type_DOUBLE), nil},
{"abc bcd", string("abc bcd"), parquet.TypePtr(parquet.Type_BYTE_ARRAY), nil},
{"abc bcd", string("abc bcd"), parquet.TypePtr(parquet.Type_FIXED_LEN_BYTE_ARRAY), nil},
{"false", bool(false), parquet.TypePtr(parquet.Type_BOOLEAN), nil, 0, 0},
{"1", int32(1), parquet.TypePtr(parquet.Type_INT32), nil, 0, 0},
{"0", int64(0), parquet.TypePtr(parquet.Type_INT64), nil, 0, 0},
{"12345", StrIntToBinary("12345", "LittleEndian", 12, true), parquet.TypePtr(parquet.Type_INT96), nil, 0, 0},
{"0.1", float32(0.1), parquet.TypePtr(parquet.Type_FLOAT), nil, 0, 0},
{"0.1", float64(0.1), parquet.TypePtr(parquet.Type_DOUBLE), nil, 0, 0},
{"abc bcd", string("abc bcd"), parquet.TypePtr(parquet.Type_BYTE_ARRAY), nil, 0, 0},
{"abc bcd", string("abc bcd"), parquet.TypePtr(parquet.Type_FIXED_LEN_BYTE_ARRAY), nil, 0, 0},

{"abc bcd", string("abc bcd"), parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8)},
{"1", int32(1), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_8)},
{"1", int32(1), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_16)},
{"1", int32(1), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_32)},
{"1", int64(1), parquet.TypePtr(parquet.Type_INT64), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_64)},
{"1", uint32(1), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_8)},
{"1", uint32(1), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_16)},
{"1", uint32(1), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32)},
{"1", uint64(1), parquet.TypePtr(parquet.Type_INT64), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_64)},
{"1", int32(1), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_DATE)},
{"1", int32(1), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_TIME_MILLIS)},
{"1", int64(1), parquet.TypePtr(parquet.Type_INT64), parquet.ConvertedTypePtr(parquet.ConvertedType_TIME_MICROS)},
{"1", int64(1), parquet.TypePtr(parquet.Type_INT64), parquet.ConvertedTypePtr(parquet.ConvertedType_TIMESTAMP_MICROS)},
{"1", int64(1), parquet.TypePtr(parquet.Type_INT64), parquet.ConvertedTypePtr(parquet.ConvertedType_TIMESTAMP_MILLIS)},
{"123456789", StrIntToBinary("123456789", "LittleEndian", 12, false), parquet.TypePtr(parquet.Type_FIXED_LEN_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_INTERVAL)},
{"123.45", int32(12345), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_DECIMAL)},
{"123.45", int64(12345), parquet.TypePtr(parquet.Type_INT64), parquet.ConvertedTypePtr(parquet.ConvertedType_DECIMAL)},
{"123.45", StrIntToBinary("12345", "BigEndian", 12, true), parquet.TypePtr(parquet.Type_FIXED_LEN_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_DECIMAL)},
{"123.45", StrIntToBinary("12345", "BigEndian", 0, true), parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_DECIMAL)},
{"abc bcd", string("abc bcd"), parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8), 0, 0},
{"1", int32(1), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_8), 0, 0},
{"1", int32(1), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_16), 0, 0},
{"1", int32(1), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_32), 0, 0},
{"1", int64(1), parquet.TypePtr(parquet.Type_INT64), parquet.ConvertedTypePtr(parquet.ConvertedType_INT_64), 0, 0},
{"1", uint32(1), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_8), 0, 0},
{"1", uint32(1), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_16), 0, 0},
{"1", uint32(1), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32), 0, 0},
{"1", uint64(1), parquet.TypePtr(parquet.Type_INT64), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_64), 0, 0},
{"1", int32(1), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_DATE), 0, 0},
{"1", int32(1), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_TIME_MILLIS), 0, 0},
{"1", int64(1), parquet.TypePtr(parquet.Type_INT64), parquet.ConvertedTypePtr(parquet.ConvertedType_TIME_MICROS), 0, 0},
{"1", int64(1), parquet.TypePtr(parquet.Type_INT64), parquet.ConvertedTypePtr(parquet.ConvertedType_TIMESTAMP_MICROS), 0, 0},
{"1", int64(1), parquet.TypePtr(parquet.Type_INT64), parquet.ConvertedTypePtr(parquet.ConvertedType_TIMESTAMP_MILLIS), 0, 0},
{"123456789", StrIntToBinary("123456789", "LittleEndian", 12, false), parquet.TypePtr(parquet.Type_FIXED_LEN_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_INTERVAL), 0, 0},
{"123.45", int32(12345), parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_DECIMAL), 12, 2},
{"123.45", int64(12345), parquet.TypePtr(parquet.Type_INT64), parquet.ConvertedTypePtr(parquet.ConvertedType_DECIMAL), 12, 2},
{"123.45", StrIntToBinary("12345", "BigEndian", 12, true), parquet.TypePtr(parquet.Type_FIXED_LEN_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_DECIMAL), 12, 2},
{"373.1145", StrIntToBinary("373114500000000000000", "BigEndian", 16, true), parquet.TypePtr(parquet.Type_FIXED_LEN_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_DECIMAL), 16, 18},
{"123.45", StrIntToBinary("12345", "BigEndian", 0, true), parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_DECIMAL), 12, 2},
{"373.1145", StrIntToBinary("373114500000000000000", "BigEndian", 0, true), parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_DECIMAL), 16, 18},
}

for _, data := range testData {
res := fmt.Sprintf("%v", StrToParquetType(data.StrData, data.PT, data.CT, 12, 2))
res := fmt.Sprintf("%v", StrToParquetType(data.StrData, data.PT, data.CT, data.Length, data.Scale))
expect := fmt.Sprintf("%v", data.GoData)
if res != expect {
t.Errorf("StrToParquetType err %v-%v, expect %v, get %v", data.PT, data.CT, expect, res)
t.Errorf("StrToParquetType err %v-%v, expect %s, got %s", data.PT, data.CT, expect, res)
}
}
}
Expand Down

0 comments on commit 7fc2d91

Please sign in to comment.