From 21e80a3aafec58f4538de7f8af2b24ffd936e551 Mon Sep 17 00:00:00 2001 From: Noah Treuhaft Date: Thu, 26 Jan 2023 16:02:21 -0500 Subject: [PATCH] service: add csv.delim query param to load API (#4333) The csv.delim query parameter to the load API (POST /pool/{pool}/branch/{branch}) specifies the field delimiter for CSV input, allowing use of a character other than ",". For #4238. --- docs/lake/api.md | 1 + service/handlers.go | 10 ++++++++++ service/ztests/curl-load-csv.yaml | 29 +++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+) create mode 100644 service/ztests/curl-load-csv.yaml diff --git a/docs/lake/api.md b/docs/lake/api.md index 5df2bb2927..4cafce241a 100644 --- a/docs/lake/api.md +++ b/docs/lake/api.md @@ -143,6 +143,7 @@ POST /pool/{pool}/branch/{branch} | branch | string | path | **Required.** Name of branch to which data will be loaded. | | | various | body | **Required.** Contents of the posted data. | | Content-Type | string | header | MIME type of the posted content. If undefined, the service will attempt to introspect the data and determine type automatically. | +| csv.delim | string | query | Exactly one character specifing the field delimiter for CSV data. Defaults to ",". | **Example Request** diff --git a/service/handlers.go b/service/handlers.go index d10a859c9b..b2abbeb607 100644 --- a/service/handlers.go +++ b/service/handlers.go @@ -24,6 +24,7 @@ import ( "github.com/brimdata/zed/service/srverr" "github.com/brimdata/zed/zio" "github.com/brimdata/zed/zio/anyio" + "github.com/brimdata/zed/zio/csvio" "github.com/brimdata/zed/zio/zngio" "github.com/segmentio/ksuid" ) @@ -333,6 +334,14 @@ func handleBranchLoad(c *Core, w *ResponseWriter, r *Request) { if !ok { return } + var csvDelim rune + if s := r.URL.Query().Get("csv.delim"); s != "" { + if len(s) != 1 { + w.Error(srverr.ErrInvalid(`invalid query param "csv.delim": must be exactly one character`)) + return + } + csvDelim = rune(s[0]) + } message, ok := r.decodeCommitMessage(w) if !ok { return @@ -375,6 +384,7 @@ func handleBranchLoad(c *Core, w *ResponseWriter, r *Request) { } opts := anyio.ReaderOpts{ Format: format, + CSV: csvio.ReaderOpts{Delim: csvDelim}, // Force validation of ZNG when loading into the lake. ZNG: zngio.ReaderOpts{Validate: true}, } diff --git a/service/ztests/curl-load-csv.yaml b/service/ztests/curl-load-csv.yaml new file mode 100644 index 0000000000..4cb8db4344 --- /dev/null +++ b/service/ztests/curl-load-csv.yaml @@ -0,0 +1,29 @@ +script: | + source service.sh + zed create -q test + curl -H Content-Type:text/csv --data-binary @in.csv \ + --fail $ZED_LAKE/pool/test/branch/main | zq -z commit:=0 - + curl -H Content-Type:text/csv --data-binary @in-dot.csv \ + --fail $ZED_LAKE/pool/test/branch/main?csv.delim=. | zq -z commit:=0 - + echo // + zed query -z 'from test' + +inputs: + - name: in.csv + data: | + a,b + 1,2 + - name: in-dot.csv + data: | + a.b + 3.4 + - name: service.sh + +outputs: + - name: stdout + data: | + {commit:0,warnings:[]([string])} + {commit:0,warnings:[]([string])} + // + {a:1.,b:2.} + {a:3.,b:4.}