Skip to content

Commit

Permalink
WIP.
Browse files Browse the repository at this point in the history
  • Loading branch information
Tang8330 committed Nov 21, 2024
1 parent 4a7b710 commit d3990d3
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 12 deletions.
15 changes: 5 additions & 10 deletions clients/s3/s3.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,8 @@ func (s *Store) Merge(ctx context.Context, tableData *optimization.TableData) er
return nil
}

var parquetColumns []parquetutil.ParquetColumn
for _, col := range tableData.ReadOnlyInMemoryCols().ValidColumns() {
parquetColumns = append(parquetColumns, parquetutil.NewParquetColumn(col.Name(), col))
}

schema, err := parquetutil.GenerateCSVSchema(parquetColumns)
cols := tableData.ReadOnlyInMemoryCols().ValidColumns()
schema, err := parquetutil.GenerateCSVSchema(cols)
if err != nil {
return fmt.Errorf("failed to generate parquet schema: %w", err)
}
Expand All @@ -104,13 +100,12 @@ func (s *Store) Merge(ctx context.Context, tableData *optimization.TableData) er
pw.CompressionType = parquet.CompressionCodec_GZIP
for _, val := range tableData.Rows() {
var row []any
for _, parquetCol := range parquetColumns {
value, err := parquetutil.ParseValue(val[parquetCol.OriginalName()], parquetCol.Column())
for _, col := range cols {
value, err := parquetutil.ParseValue(val[col.Name()], col)
if err != nil {
return fmt.Errorf("failed to parse value, err: %w, value: %v, column: %q", err, val[parquetCol.OriginalName()], parquetCol.CleanedName())
return fmt.Errorf("failed to parse value, err: %w, value: %v, column: %q", err, val[col.Name()], col.Name())
}

fmt.Println("###", parquetCol.OriginalName(), "value", value)
row = append(row, value)
}

Expand Down
4 changes: 2 additions & 2 deletions lib/parquetutil/generate_schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,11 @@ func GenerateJSONSchema(columns []ParquetColumn) (string, error) {
return string(schemaBytes), nil
}

func GenerateCSVSchema(columns []ParquetColumn) ([]string, error) {
func GenerateCSVSchema(columns []columns.Column) ([]string, error) {
var fields []string
for _, column := range columns {
// We don't need to escape the column name here.
field, err := column.column.KindDetails.ParquetAnnotation(column.cleanedName)
field, err := column.KindDetails.ParquetAnnotation(column.Name())
if err != nil {
return nil, err
}
Expand Down

0 comments on commit d3990d3

Please sign in to comment.