Skip to content

Commit

Permalink
improve zio/csvio.Writer output (#3129)
Browse files Browse the repository at this point in the history
* Change bool output from F and T to false and true.
* Change bytes output from Base64 to hexadecimal with 0x prefix.
* Change duration output from floating point to ZSON format.
* Change float64 output from %f to %g.
* Change type output from something broken to ZSON format.
  • Loading branch information
nwt authored Sep 30, 2021
1 parent fe1cb75 commit 547a203
Show file tree
Hide file tree
Showing 8 changed files with 33 additions and 32 deletions.
8 changes: 4 additions & 4 deletions docs/language/operators/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -248,10 +248,10 @@ zq -f csv 'ts < 1521911721 | fuse' stats.log.gz weird.log.gz
```mdtest-output
_path,ts,peer,mem,pkts_proc,bytes_recv,pkts_dropped,pkts_link,pkt_lag,events_proc,events_queued,active_tcp_conns,active_udp_conns,active_icmp_conns,tcp_conns,udp_conns,icmp_conns,timers,active_timers,files,active_files,dns_requests,active_dns_requests,reassem_tcp_size,reassem_file_size,reassem_frag_size,reassem_unknown_size,uid,id.orig_h,id.orig_p,id.resp_h,id.resp_p,name,addl,notice
stats,2018-03-24T17:15:20.600725Z,zeek,74,26,29375,,,,404,11,1,0,0,1,0,0,36,32,0,0,0,0,1528,0,0,0,,,,,,,,
weird,2018-03-24T17:15:20.600843Z,zeek,,,,,,,,,,,,,,,,,,,,,,,,,C1zOivgBT6dBmknqk,10.47.1.152,49562,23.217.103.245,80,TCP_ack_underflow_or_misorder,,F
weird,2018-03-24T17:15:20.608108Z,zeek,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,truncated_header,,F
weird,2018-03-24T17:15:20.610033Z,zeek,,,,,,,,,,,,,,,,,,,,,,,,,C45Ff03lESjMQQQej1,10.47.5.155,40712,91.189.91.23,80,above_hole_data_without_any_acks,,F
weird,2018-03-24T17:15:20.742818Z,zeek,,,,,,,,,,,,,,,,,,,,,,,,,Cs7J9j2xFQcazrg7Nc,10.47.8.100,5900,10.129.53.65,58485,connection_originator_SYN_ack,,F
weird,2018-03-24T17:15:20.600843Z,zeek,,,,,,,,,,,,,,,,,,,,,,,,,C1zOivgBT6dBmknqk,10.47.1.152,49562,23.217.103.245,80,TCP_ack_underflow_or_misorder,,false
weird,2018-03-24T17:15:20.608108Z,zeek,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,truncated_header,,false
weird,2018-03-24T17:15:20.610033Z,zeek,,,,,,,,,,,,,,,,,,,,,,,,,C45Ff03lESjMQQQej1,10.47.5.155,40712,91.189.91.23,80,above_hole_data_without_any_acks,,false
weird,2018-03-24T17:15:20.742818Z,zeek,,,,,,,,,,,,,,,,,,,,,,,,,Cs7J9j2xFQcazrg7Nc,10.47.8.100,5900,10.129.53.65,58485,connection_originator_SYN_ack,,false
```

Other output formats invoked via `zq -f` that benefit greatly from the use of
Expand Down
2 changes: 1 addition & 1 deletion zio/anyio/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ func NewWriter(w io.WriteCloser, opts WriterOpts) (zio.WriteCloser, error) {
case "table":
return tableio.NewWriter(w, opts.UTF8), nil
case "csv":
return csvio.NewWriter(w, csvio.WriterOpts{UTF8: opts.UTF8}), nil
return csvio.NewWriter(w), nil
case "parquet":
return parquetio.NewWriter(w), nil
case "lake":
Expand Down
33 changes: 15 additions & 18 deletions zio/csvio/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,10 @@ import (
"encoding/csv"
"errors"
"io"
"time"
"strconv"

"github.com/brimdata/zed"
"github.com/brimdata/zed/expr"
"github.com/brimdata/zed/zio/tzngio"
)

var ErrNotDataFrame = errors.New("CSV output requires uniform records but multiple types encountered (consider 'fuse')")
Expand All @@ -17,24 +16,18 @@ type Writer struct {
writer io.WriteCloser
encoder *csv.Writer
flattener *expr.Flattener
format tzngio.OutFmt
first *zed.TypeRecord
}

type WriterOpts struct {
UTF8 bool
}

func NewWriter(w io.WriteCloser, opts WriterOpts) *Writer {
format := tzngio.OutFormatZeekAscii
if opts.UTF8 {
format = tzngio.OutFormatZeek
}
func NewWriter(w io.WriteCloser) *Writer {
return &Writer{
writer: w,
encoder: csv.NewWriter(w),
flattener: expr.NewFlattener(zed.NewContext()),
format: format,
}
}

Expand Down Expand Up @@ -67,24 +60,28 @@ func (w *Writer) Write(rec *zed.Record) error {
}
var out []string
for k, col := range rec.Columns() {
var v string
var s string
// O(n^2)
value := rec.ValueByColumn(k)
if !value.IsUnsetOrNil() {
switch col.Type.ID() {
case zed.IDTime:
ts, err := zed.DecodeTime(value.Bytes)
id := col.Type.ID()
switch {
case zed.IsStringy(id):
s = string(value.Bytes)
case zed.IsFloat(id):
v, err := zed.DecodeFloat64(value.Bytes)
if err != nil {
return err
}
v = ts.Time().UTC().Format(time.RFC3339Nano)
case zed.IDString, zed.IDBstring, zed.IDType, zed.IDError:
v = string(value.Bytes)
s = strconv.FormatFloat(v, 'g', -1, 64)
case id == zed.IDBytes && len(value.Bytes) == 0:
// We want "" instead of "0x" from
// value.Type.Format.
default:
v = tzngio.FormatValue(value, w.format)
s = value.Type.Format(value.Bytes)
}
}
out = append(out, v)
out = append(out, s)
}
return w.encoder.Write(out)
}
4 changes: 2 additions & 2 deletions zio/csvio/ztests/bool.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ output-flags: -f csv
output: |
bool
F
T
false
true
2 changes: 1 addition & 1 deletion zio/csvio/ztests/bytes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ output: |
bytes
aGVsbG8K
0x68656c6c6f0a
6 changes: 3 additions & 3 deletions zio/csvio/ztests/duration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ output-flags: -f csv
output: |
duration
0
-9223372036.854775808
9223372036.854775807
0s
-292y171d23h47m16.854775808s
292y171d23h47m16.854775807s
6 changes: 4 additions & 2 deletions zio/csvio/ztests/float.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ zed: '*'
input: |
{float64:null(float64)}
{float64:0.}
{float64:1.1}
{float64:1.7976931348623157e+308}
{float64:5e-324}
{float64:-Inf}
Expand All @@ -17,8 +18,9 @@ output: |
float64
0
179769313486231570000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005
1.1
1.7976931348623157e+308
5e-324
-Inf
+Inf
NaN
4 changes: 3 additions & 1 deletion zio/csvio/ztests/type.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@ zed: '*'

input: |
{type:null(type)}
{type:({a:int64})}
output-flags: -f csv

output: |+
output: |
type
({a:int64})

0 comments on commit 547a203

Please sign in to comment.