diff --git a/README.md b/README.md index 5aa92cc1..f45451ff 100644 --- a/README.md +++ b/README.md @@ -33,12 +33,12 @@ There are two types in Parquet: Primitive Type and Logical Type. Logical types a |BYTE_ARRAY|BYTE_ARRAY|string| |FIXED_LEN_BYTE_ARRAY|FIXED_LEN_BYTE_ARRAY|string| |UTF8|BYTE_ARRAY|string| -|INT_8|INT32|int32| -|INT_16|INT32|int32| +|INT_8|INT32|int8| +|INT_16|INT32|int16| |INT_32|INT32|int32| |INT_64|INT64|int64| -|UINT_8|INT32|uint32| -|UINT_16|INT32|uint32| +|UINT_8|INT32|uint8| +|UINT_16|INT32|uint16| |UINT_32|INT32|uint32| |UINT_64|INT64|uint64| |DATE|INT32|int32| diff --git a/example/column_read.go b/example/column_read.go index 6d218791..47071d89 100644 --- a/example/column_read.go +++ b/example/column_read.go @@ -34,8 +34,8 @@ func main() { log.Println("Can't create parquet writer") return } - num := 10 - for i := 0; i < num; i++ { + num := int64(10) + for i := 0; int64(i) < num; i++ { stu := Student{ Name: "StudentName", Age: int32(20 + i%5), @@ -70,7 +70,7 @@ func main() { log.Println("Can't create column reader", err) return } - num = int(pr.GetNumRows()) + num = int64(pr.GetNumRows()) pr.SkipRowsByPath("parquet_go_root.name", 5) //skip the first five rows names, rls, dls, err = pr.ReadColumnByPath("parquet_go_root.name", num) diff --git a/example/type.go b/example/type.go index 0acef95f..7a09904f 100644 --- a/example/type.go +++ b/example/type.go @@ -20,12 +20,12 @@ type TypeList struct { FixedLenByteArray string `parquet:"name=FixedLenByteArray, type=FIXED_LEN_BYTE_ARRAY, length=10"` Utf8 string `parquet:"name=utf8, type=UTF8, encoding=PLAIN_DICTIONARY"` - Int_8 int32 `parquet:"name=int_8, type=INT_8"` - Int_16 int32 `parquet:"name=int_16, type=INT_16"` + Int_8 int8 `parquet:"name=int_8, type=INT_8"` + Int_16 int16 `parquet:"name=int_16, type=INT_16"` Int_32 int32 `parquet:"name=int_32, type=INT_32"` Int_64 int64 `parquet:"name=int_64, type=INT_64"` - Uint_8 uint32 `parquet:"name=uint_8, type=UINT_8"` - Uint_16 uint32 `parquet:"name=uint_16, type=UINT_16"` + Uint_8 uint8 `parquet:"name=uint_8, type=UINT_8"` + Uint_16 uint16 `parquet:"name=uint_16, type=UINT_16"` Uint_32 uint32 `parquet:"name=uint_32, type=UINT_32"` Uint_64 uint64 `parquet:"name=uint_64, type=UINT_64"` Date int32 `parquet:"name=date, type=DATE"` @@ -71,12 +71,12 @@ func main() { FixedLenByteArray: "HelloWorld", Utf8: "utf8", - Int_8: int32(i), - Int_16: int32(i), + Int_8: int8(i), + Int_16: int16(i), Int_32: int32(i), Int_64: int64(i), - Uint_8: uint32(i), - Uint_16: uint32(i), + Uint_8: uint8(i), + Uint_16: uint16(i), Uint_32: uint32(i), Uint_64: uint64(i), Date: int32(i), diff --git a/schema/gettype.go b/schema/gettype.go index 14d2991f..45073493 100644 --- a/schema/gettype.go +++ b/schema/gettype.go @@ -41,10 +41,10 @@ func (self *SchemaHandler) GetTypes() []reflect.Type { if nc == 0 { if *rT != parquet.FieldRepetitionType_REPEATED { - elementTypes[idx] = types.ParquetTypeToGoReflectType(pT, rT) + elementTypes[idx] = types.ParquetTypeToGoReflectType(pT, cT, rT) } else { - elementTypes[idx] = reflect.SliceOf(types.ParquetTypeToGoReflectType(pT, nil)) + elementTypes[idx] = reflect.SliceOf(types.ParquetTypeToGoReflectType(pT, cT, nil)) } } else { diff --git a/types/types.go b/types/types.go index b2310c57..ecb8d128 100644 --- a/types/types.go +++ b/types/types.go @@ -69,14 +69,44 @@ func TypeNameToParquetType(name string, baseName string) (*parquet.Type, *parque panic(fmt.Errorf("Unknown data type: '%s'", name)) } -func ParquetTypeToGoReflectType(pT *parquet.Type, rT *parquet.FieldRepetitionType) reflect.Type { +func ParquetTypeToGoReflectType(pT *parquet.Type, cT *parquet.ConvertedType, rT *parquet.FieldRepetitionType) reflect.Type { if rT == nil || *rT != parquet.FieldRepetitionType_OPTIONAL { if *pT == parquet.Type_BOOLEAN { return reflect.TypeOf(true) + } else if *pT == parquet.Type_INT32 && cT == nil { + return reflect.TypeOf(int32(0)) + + } else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_INT_8 { + return reflect.TypeOf(int8(0)) + + }else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_INT_16 { + return reflect.TypeOf(int16(0)) + + }else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_INT_32 { + return reflect.TypeOf(int32(0)) + + }else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_UINT_8 { + return reflect.TypeOf(uint8(0)) + + }else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_UINT_16 { + return reflect.TypeOf(uint16(0)) + + }else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_UINT_32 { + return reflect.TypeOf(uint32(0)) + } else if *pT == parquet.Type_INT32 { return reflect.TypeOf(int32(0)) + }else if *pT == parquet.Type_INT64 && cT == nil { + return reflect.TypeOf(int64(0)) + + }else if *pT == parquet.Type_INT64 && *cT == parquet.ConvertedType_INT_64 { + return reflect.TypeOf(int64(0)) + + }else if *pT == parquet.Type_INT64 && *cT == parquet.ConvertedType_UINT_64 { + return reflect.TypeOf(uint64(0)) + } else if *pT == parquet.Type_INT64 { return reflect.TypeOf(int64(0)) @@ -104,10 +134,50 @@ func ParquetTypeToGoReflectType(pT *parquet.Type, rT *parquet.FieldRepetitionTyp v := true return reflect.TypeOf(&v) + } else if *pT == parquet.Type_INT32 && cT == nil{ + v := int32(0) + return reflect.TypeOf(&v) + + } else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_INT_8 { + v := int8(0) + return reflect.TypeOf(&v) + + } else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_INT_16 { + v := int16(0) + return reflect.TypeOf(&v) + + } else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_INT_32 { + v := int32(0) + return reflect.TypeOf(&v) + + } else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_UINT_8 { + v := uint8(0) + return reflect.TypeOf(&v) + + } else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_UINT_16 { + v := uint16(0) + return reflect.TypeOf(&v) + + } else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_UINT_32 { + v := uint32(0) + return reflect.TypeOf(&v) + } else if *pT == parquet.Type_INT32 { v := int32(0) return reflect.TypeOf(&v) + } else if *pT == parquet.Type_INT64 && cT == nil { + v := int64(0) + return reflect.TypeOf(&v) + + } else if *pT == parquet.Type_INT64 && *cT == parquet.ConvertedType_INT_64 { + v := int64(0) + return reflect.TypeOf(&v) + + } else if *pT == parquet.Type_INT64 && *cT == parquet.ConvertedType_UINT_64 { + v := uint64(0) + return reflect.TypeOf(&v) + } else if *pT == parquet.Type_INT64 { v := int64(0) return reflect.TypeOf(&v) @@ -146,10 +216,14 @@ func ParquetTypeToGoType(src interface{}, pT *parquet.Type, cT *parquet.Converte return src } - if *cT == parquet.ConvertedType_UINT_8 { - return uint32(src.(int32)) + if *cT == parquet.ConvertedType_INT_8 { + return int8(src.(int32)) + } else if *cT == parquet.ConvertedType_INT_16 { + return int16(src.(int32)) + } else if *cT == parquet.ConvertedType_UINT_8 { + return uint8(src.(int32)) } else if *cT == parquet.ConvertedType_UINT_16 { - return uint32(src.(int32)) + return uint16(src.(int32)) } else if *cT == parquet.ConvertedType_UINT_32 { return uint32(src.(int32)) } else if *cT == parquet.ConvertedType_UINT_64 { @@ -164,10 +238,14 @@ func GoTypeToParquetType(src interface{}, pT *parquet.Type, cT *parquet.Converte return src } - if *cT == parquet.ConvertedType_UINT_8 { - return int32(src.(uint32)) + if *cT == parquet.ConvertedType_INT_8 { + return int32(src.(int8)) + }else if *cT == parquet.ConvertedType_INT_16 { + return int32(src.(int16)) + } else if *cT == parquet.ConvertedType_UINT_8 { + return int32(src.(uint8)) } else if *cT == parquet.ConvertedType_UINT_16 { - return int32(src.(uint32)) + return int32(src.(uint16)) } else if *cT == parquet.ConvertedType_UINT_32 { return int32(src.(uint32)) } else if *cT == parquet.ConvertedType_UINT_64 { @@ -221,16 +299,40 @@ func StrToParquetType(s string, pT *parquet.Type, cT *parquet.ConvertedType, len if *cT == parquet.ConvertedType_UTF8 { return s - } else if *cT == parquet.ConvertedType_INT_8 || *cT == parquet.ConvertedType_INT_16 || *cT == parquet.ConvertedType_INT_32 || - *cT == parquet.ConvertedType_DATE || *cT == parquet.ConvertedType_TIME_MILLIS { + } else if *cT == parquet.ConvertedType_INT_8 { + var v int8 + fmt.Sscanf(s, "%d", &v) + return int32(v) + + } else if *cT == parquet.ConvertedType_INT_16 { + var v int16 + fmt.Sscanf(s, "%d", &v) + return int32(v) + + } else if *cT == parquet.ConvertedType_INT_32 { var v int32 fmt.Sscanf(s, "%d", &v) - return v + return int32(v) - } else if *cT == parquet.ConvertedType_UINT_8 || *cT == parquet.ConvertedType_UINT_16 || *cT == parquet.ConvertedType_UINT_32 { - var vt uint32 - fmt.Sscanf(s, "%d", &vt) - return int32(vt) + } else if *cT == parquet.ConvertedType_UINT_8 { + var v uint8 + fmt.Sscanf(s, "%d", &v) + return int32(v) + + } else if *cT == parquet.ConvertedType_UINT_16 { + var v uint16 + fmt.Sscanf(s, "%d", &v) + return int32(v) + + } else if *cT == parquet.ConvertedType_UINT_32 { + var v uint32 + fmt.Sscanf(s, "%d", &v) + return int32(v) + + } else if *cT == parquet.ConvertedType_DATE || *cT == parquet.ConvertedType_TIME_MILLIS { + var v int32 + fmt.Sscanf(s, "%d", &v) + return int32(v) } else if *cT == parquet.ConvertedType_UINT_64 { var vt uint64