Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(gateway): JSON and CBOR response formats (IPIP-328) #9335

Merged
merged 39 commits into from
Dec 5, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
ac8f9b1
wip: play with dag-cbor and dag-json
hacdias Oct 6, 2022
3dce012
wip: add application/json and application/cbor
hacdias Oct 7, 2022
b557181
fix: go cases don't flow automatically :)
hacdias Oct 10, 2022
4104bb7
test: add some dag-json and dag-cbor tests
hacdias Oct 10, 2022
25893c5
test: improve names
hacdias Oct 10, 2022
e621e64
feat: allow json and cbor data types too
hacdias Oct 11, 2022
44946ed
refactor: avoid encoding things that are already on their right encoding
hacdias Oct 13, 2022
199ab42
fix: remove responseFormat from logging
hacdias Oct 13, 2022
89eb033
refactor: simplify serveCodec to use serveRawBlock iff data encoded i…
hacdias Oct 13, 2022
fb50869
tests: rename current tests to indicate they're unixfs only
hacdias Oct 13, 2022
fc31241
refactor: do not use serveRawBlock inside serveCodec bc headers and o…
hacdias Oct 13, 2022
55383cd
test: add test with pure json and cbor
hacdias Oct 17, 2022
a6d45c7
test: convert cbor <-> json
hacdias Oct 17, 2022
1986be1
test: path traversal and dag-pb output
hacdias Oct 19, 2022
9ef022e
fix: add more info about errors
hacdias Oct 20, 2022
cadc681
fix: add missing traversal
hacdias Oct 20, 2022
6331695
Merge branch 'master' into feat/8823
hacdias Nov 10, 2022
2c93672
fix: remove duplicate variable
hacdias Nov 10, 2022
462c71b
Merge branch 'master' into feat/8823
hacdias Nov 11, 2022
1e844c5
refactor: do not support traversal
hacdias Nov 11, 2022
bb98041
Update core/corehttp/gateway_handler_codec.go
hacdias Nov 15, 2022
53d5878
improve PR to match spec
hacdias Nov 16, 2022
4064f97
Merge branch 'master' into feat/8823
hacdias Nov 16, 2022
8c6a8da
feat: little web page
hacdias Nov 16, 2022
b5e5ff2
feat: update doc
hacdias Nov 16, 2022
8ca2a52
fix: Content-Disposition .json and .cbor
lidel Nov 23, 2022
b4dfa66
fix: inline disposition for JSON responses
lidel Nov 23, 2022
83913c7
refactor: return 501 for unsupported pathing
lidel Nov 23, 2022
ff55745
docs(cbor): improved info about codec
lidel Nov 23, 2022
52711d3
refactor: create template at assets/dag-index-html
lidel Nov 24, 2022
7e84856
fix(dag@gw): content type and cache headers
lidel Nov 25, 2022
3b89f20
Merge branch 'master' into feat/8823
hacdias Nov 28, 2022
162f435
add changelog info
hacdias Nov 28, 2022
12d0d7f
fix title
hacdias Nov 28, 2022
2d8ba78
rm wild block
hacdias Nov 30, 2022
b5874e7
Merge branch 'master' into feat/8823
hacdias Dec 5, 2022
0c08a76
Merge branch 'master' into feat/8823
hacdias Dec 5, 2022
f084f09
fix(dag-index-html): remove technical jargon
lidel Dec 5, 2022
32bcd41
Merge branch 'master' into feat/8823
lidel Dec 5, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file not shown.
13 changes: 10 additions & 3 deletions core/corehttp/gateway_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
coreiface "github.com/ipfs/interface-go-ipfs-core"
ipath "github.com/ipfs/interface-go-ipfs-core/path"
routing "github.com/libp2p/go-libp2p/core/routing"
mc "github.com/multiformats/go-multicodec"
prometheus "github.com/prometheus/client_golang/prometheus"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
Expand Down Expand Up @@ -417,9 +418,15 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request

// Support custom response formats passed via ?format or Accept HTTP header
switch responseFormat {
case "": // The implicit response format is UnixFS
logger.Debugw("serving unixfs", "path", contentPath)
i.serveUnixFS(r.Context(), w, r, resolvedPath, contentPath, begin, logger)
case "":
switch resolvedPath.Cid().Prefix().Codec {
case uint64(mc.Json), uint64(mc.DagJson), uint64(mc.Cbor), uint64(mc.DagCbor):
logger.Debugw("serving codec", "path", contentPath)
i.serveCodec(r.Context(), w, r, resolvedPath, contentPath, begin, responseFormat)
default:
logger.Debugw("serving unixfs", "path", contentPath)
i.serveUnixFS(r.Context(), w, r, resolvedPath, contentPath, begin, logger)
}
return
case "application/vnd.ipld.raw":
logger.Debugw("serving raw block", "path", contentPath)
Expand Down
148 changes: 108 additions & 40 deletions core/corehttp/gateway_handler_codec.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@ import (
"go.opentelemetry.io/otel/trace"
)

// codecToContentType maps the supported IPLD codecs to the HTTP Content
// Type they should have.
var codecToContentType = map[uint64]string{
uint64(mc.Json): "application/json",
uint64(mc.Cbor): "application/cbor",
uint64(mc.DagJson): "application/vnd.ipld.dag-json",
uint64(mc.DagCbor): "application/vnd.ipld.dag-cbor",
}

// contentTypeToCodecs maps the HTTP Content Type to the respective
// possible codecs. If the original data is in one of those codecs,
// we stream the raw bytes. Otherwise, we encode in the last codec
Expand All @@ -31,62 +40,102 @@ var contentTypeToCodecs = map[string][]uint64{
"application/vnd.ipld.dag-cbor": {uint64(mc.DagCbor)},
}

func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, contentType string) {
ctx, span := tracing.Span(ctx, "Gateway", "ServeCodec", trace.WithAttributes(attribute.String("path", resolvedPath.String()), attribute.String("contentType", contentType)))
func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, requestedContentType string) {
ctx, span := tracing.Span(ctx, "Gateway", "ServeCodec", trace.WithAttributes(attribute.String("path", resolvedPath.String()), attribute.String("requestedContentType", requestedContentType)))
defer span.End()

codecs, ok := contentTypeToCodecs[contentType]
// If the resolved path still has some remainder, return bad request.
if resolvedPath.Remainder() != "" {
path := strings.TrimSuffix(resolvedPath.String(), resolvedPath.Remainder())
err := fmt.Errorf("%s could not be fully resolved, try %s instead", resolvedPath.String(), path)
webError(w, "path has remainder", err, http.StatusBadRequest)
return
}
lidel marked this conversation as resolved.
Show resolved Hide resolved

// No content type is specified by the user (via Accept, or format=). However,
// we support this format. Let's handle it.
if requestedContentType == "" {
cidCodec := resolvedPath.Cid().Prefix().Codec
isDAG := cidCodec == uint64(mc.DagJson) || cidCodec == uint64(mc.DagCbor)
acceptsHTML := strings.Contains(r.Header.Get("Accept"), "text/html")

if isDAG && acceptsHTML {
i.serverCodecHTML(ctx, w, r, resolvedPath, contentPath)
} else {
cidContentType, ok := codecToContentType[cidCodec]
if !ok {
// Should not happen unless function is called with wrong parameters.
err := fmt.Errorf("content type not found for codec: %v", cidCodec)
webError(w, "internal error", err, http.StatusInternalServerError)
return
}

i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, cidContentType)
}

return
}

// Otherwise, the user has requested a specific content type. Let's first get
// the codecs that can be used with this content type.
codecs, ok := contentTypeToCodecs[requestedContentType]
if !ok {
// This is never supposed to happen unless function is called with wrong parameters.
err := fmt.Errorf("unsupported content type: %s", contentType)
err := fmt.Errorf("unsupported content type: %s", requestedContentType)
webError(w, err.Error(), err, http.StatusInternalServerError)
return
}

// We do not support paths for non-UnixFS codecs. If we have a path, return 404.
path := strings.TrimSuffix(resolvedPath.String(), "/")
if strings.Count(path, "/") > 2 {
err := fmt.Errorf("%s not found", resolvedPath.String())
webError(w, err.Error(), err, http.StatusNotFound)
// If the requested content type has "dag-", ALWAYS go through the encoding
// process in order to validate the content.
if strings.Contains(requestedContentType, "dag-") {
i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, requestedContentType, codecs[len(codecs)-1])
return
}

// Set Cache-Control and read optional Last-Modified time
modtime := addCacheControlHeaders(w, r, contentPath, resolvedPath.Cid())
name := addContentDispositionHeader(w, r, contentPath)
w.Header().Set("Content-Type", contentType)
w.Header().Set("X-Content-Type-Options", "nosniff")

// If the data is already encoded with supported and compatible codec, we can just stream the raw
// data. serveRawBlock cannot be directly used here as it sets different headers.
// Otherwise, check if the data is encoded with the requested content type.
// If so, we can directly stream the raw data. serveRawBlock cannot be directly
// used here as it sets different headers.
for _, codec := range codecs {
if resolvedPath.Cid().Prefix().Codec == codec {
blockCid := resolvedPath.Cid()
blockReader, err := i.api.Block().Get(ctx, resolvedPath)
if err != nil {
webError(w, "ipfs block get "+blockCid.String(), err, http.StatusInternalServerError)
return
}
block, err := io.ReadAll(blockReader)
if err != nil {
webError(w, "ipfs block get "+blockCid.String(), err, http.StatusInternalServerError)
return
}
content := bytes.NewReader(block)

// ServeContent will take care of
// If-None-Match+Etag, Content-Length and range requests
_, _, _ = ServeContent(w, r, name, modtime, content)
i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, requestedContentType)
return
}
}

// Sets correct Last-Modified header. This code is borrowed from the standard
// library (net/http/server.go) as we cannot use serveFile.
if !(modtime.IsZero() || modtime.Equal(unixEpochTime)) {
w.Header().Set("Last-Modified", modtime.UTC().Format(http.TimeFormat))
// Finally, if nothing of the above is true, we have to actually convert the codec.
i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, requestedContentType, codecs[len(codecs)-1])
}

func (i *gatewayHandler) serverCodecHTML(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path) {
w.Write([]byte("TODO"))
}

func (i *gatewayHandler) serveCodecRaw(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, contentType string) {
modtime := addCacheControlHeaders(w, r, contentPath, resolvedPath.Cid())
name := addContentDispositionHeader(w, r, contentPath)
w.Header().Set("Content-Type", contentType)
w.Header().Set("X-Content-Type-Options", "nosniff")

blockCid := resolvedPath.Cid()
blockReader, err := i.api.Block().Get(ctx, resolvedPath)
if err != nil {
webError(w, "ipfs block get "+blockCid.String(), err, http.StatusInternalServerError)
return
}
block, err := io.ReadAll(blockReader)
if err != nil {
webError(w, "ipfs block get "+blockCid.String(), err, http.StatusInternalServerError)
return
}
content := bytes.NewReader(block)

// ServeContent will take care of
// If-None-Match+Etag, Content-Length and range requests
_, _, _ = ServeContent(w, r, name, modtime, content)
}

func (i *gatewayHandler) serveCodecConverted(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, contentType string, codec uint64) {
obj, err := i.api.Dag().Get(ctx, resolvedPath.Cid())
if err != nil {
webError(w, "ipfs dag get "+html.EscapeString(resolvedPath.String()), err, http.StatusInternalServerError)
Expand All @@ -101,12 +150,31 @@ func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter,
}
finalNode := universal.(ipld.Node)

// Otherwise convert it using the last codec of the list.
encoder, err := multicodec.LookupEncoder(codecs[len(codecs)-1])
encoder, err := multicodec.LookupEncoder(codec)
if err != nil {
webError(w, err.Error(), err, http.StatusInternalServerError)
return
}

// Keep it in memory so we can detect encoding errors in order to conform
// to the specification.
var buf bytes.Buffer
err = encoder(finalNode, &buf)
if err != nil {
webError(w, err.Error(), err, http.StatusInternalServerError)
return
}

_ = encoder(finalNode, w)
// Set Cache-Control and read optional Last-Modified time
modtime := addCacheControlHeaders(w, r, contentPath, resolvedPath.Cid())
w.Header().Set("Content-Type", contentType)
w.Header().Set("X-Content-Type-Options", "nosniff")

// Sets correct Last-Modified header. This code is borrowed from the standard
// library (net/http/server.go) as we cannot use serveFile.
if !(modtime.IsZero() || modtime.Equal(unixEpochTime)) {
w.Header().Set("Last-Modified", modtime.UTC().Format(http.TimeFormat))
}

w.Write(buf.Bytes())
}
72 changes: 45 additions & 27 deletions test/sharness/t0123-gateway-json-cbor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,25 +26,25 @@ test_headers () {
name=$1
format=$2

test_expect_success "GET $name with format=dag-$format has expected Content-Type" '
test_expect_success "GET UnixFS as $name with format=dag-$format has expected Content-Type" '
curl -sD - "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID?format=dag-$format" > curl_output 2>&1 &&
test_should_contain "Content-Type: application/vnd.ipld.dag-$format" curl_output &&
test_should_not_contain "Content-Type: application/$format" curl_output
'

test_expect_success "GET $name with 'Accept: application/vnd.ipld.dag-$format' has expected Content-Type" '
test_expect_success "GET UnixFS as $name with 'Accept: application/vnd.ipld.dag-$format' has expected Content-Type" '
curl -sD - -H "Accept: application/vnd.ipld.dag-$format" "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID" > curl_output 2>&1 &&
test_should_contain "Content-Type: application/vnd.ipld.dag-$format" curl_output &&
test_should_not_contain "Content-Type: application/$format" curl_output
'

test_expect_success "GET $name with format=$format has expected Content-Type" '
test_expect_success "GET UnixFS as $name with format=$format has expected Content-Type" '
curl -sD - "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID?format=$format" > curl_output 2>&1 &&
test_should_contain "Content-Type: application/$format" curl_output &&
test_should_not_contain "Content-Type: application/vnd.ipld.dag-$format" curl_output
'

test_expect_success "GET $name with 'Accept: application/$format' has expected Content-Type" '
test_expect_success "GET UnixFS as $name with 'Accept: application/$format' has expected Content-Type" '
curl -sD - -H "Accept: application/$format" "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID" > curl_output 2>&1 &&
test_should_contain "Content-Type: application/$format" curl_output &&
test_should_not_contain "Content-Type: application/vnd.ipld.dag-$format" curl_output
Expand All @@ -58,19 +58,19 @@ test_dag_pb () {
name=$1
format=$2

test_expect_success "GET DAG-PB $name has expected output for file" '
test_expect_success "GET UnixFS as $name has expected output for file" '
curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID?format=dag-$format" > curl_output 2>&1 &&
ipfs dag get --output-codec dag-$format $FILE_CID > ipfs_dag_get_output 2>&1 &&
test_cmp ipfs_dag_get_output curl_output
'

test_expect_success "GET DAG-PB $name has expected output for directory" '
test_expect_success "GET UnixFS as $name has expected output for directory" '
curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$DIR_CID?format=dag-$format" > curl_output 2>&1 &&
ipfs dag get --output-codec dag-$format $DIR_CID > ipfs_dag_get_output 2>&1 &&
test_cmp ipfs_dag_get_output curl_output
'

test_expect_success "GET DAG-PB $name with format=dag-$format and format=$format produce same output" '
test_expect_success "GET UnixFS as $name with format=dag-$format and format=$format produce same output" '
curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$DIR_CID?format=dag-$format" > curl_output_1 2>&1 &&
curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$DIR_CID?format=$format" > curl_output_2 2>&1 &&
test_cmp curl_output_1 curl_output_2
Expand All @@ -84,6 +84,19 @@ test_cmp_dag_get () {
name=$1
format=$2

test_expect_success "GET $name without Accept or format= has expected Content-Type" '
CID=$(echo "{ \"test\": \"json\" }" | ipfs dag put --input-codec json --store-codec $format) &&
curl -sD - "http://127.0.0.1:$GWAY_PORT/ipfs/$CID" > curl_output 2>&1 &&
test_should_contain "Content-Type: application/$format" curl_output
'

test_expect_success "GET $name without Accept or format= produces correct output" '
CID=$(echo "{ \"test\": \"json\" }" | ipfs dag put --input-codec json --store-codec $format) &&
curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$CID" > curl_output 2>&1 &&
ipfs dag get --output-codec $format $CID > ipfs_dag_get_output 2>&1 &&
test_cmp ipfs_dag_get_output curl_output
'

test_expect_success "GET $name with format=$format produces correct output" '
CID=$(echo "{ \"test\": \"json\" }" | ipfs dag put --input-codec json --store-codec $format) &&
curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=$format" > curl_output 2>&1 &&
Expand Down Expand Up @@ -116,8 +129,8 @@ test_expect_success "GET CBOR as JSON produces DAG-JSON output" '
test_cmp ipfs_dag_get_output curl_output
'

DAG_CBOR_TRAVERSAL_CID="bafyreiehxu373cu3v5gyxyxfsfjryscs7sq6fh3unqcqgqhdfn3n43vrgu"
DAG_JSON_TRAVERSAL_CID="baguqeeraoaeabj5hdfcmpkzfeiwtfwb3qbvfwzbiknqn7itcwsb2fdtu7eta"
DAG_CBOR_TRAVERSAL_CID="bafyreibs4utpgbn7uqegmd2goqz4bkyflre2ek2iwv743fhvylwi4zeeim"
DAG_JSON_TRAVERSAL_CID="baguqeeram5ujjqrwheyaty3w5gdsmoz6vittchvhk723jjqxk7hakxkd47xq"
DAG_PB_CID="bafybeiegxwlgmoh2cny7qlolykdf7aq7g6dlommarldrbm7c4hbckhfcke"

test_expect_success "Add CARs for path traversal and DAG-PB representation tests" '
Expand All @@ -129,32 +142,37 @@ test_expect_success "Add CARs for path traversal and DAG-PB representation tests
test_should_contain $DAG_PB_CID import_output
'

test_expect_success "GET JSON traversal returns 404" '
curl --head "http://127.0.0.1:$GWAY_PORT/ipfs/$DAG_CBOR_TRAVERSAL_CID/foo/bar?format=json" > curl_output 2>&1 &&
test_should_contain "404 Not Found" curl_output
'

test_expect_success "GET CBOR traversal returns 404" '
curl --head "http://127.0.0.1:$GWAY_PORT/ipfs/$DAG_CBOR_TRAVERSAL_CID/foo/bar?format=cbor" > curl_output 2>&1 &&
test_should_contain "404 Not Found" curl_output
test_expect_success "GET DAG-JSON traversal returns 400 if there is path remainder" '
curl --head "http://127.0.0.1:$GWAY_PORT/ipfs/$DAG_JSON_TRAVERSAL_CID/foo?format=dag-json" > curl_output 2>&1 &&
test_should_contain "400 Bad Request" curl_output
'

test_expect_success "GET DAG-JSON traversal returns 404" '
curl --head "http://127.0.0.1:$GWAY_PORT/ipfs/$DAG_CBOR_TRAVERSAL_CID/foo/bar?format=dag-json" > curl_output 2>&1 &&
test_should_contain "404 Not Found" curl_output
test_expect_success "GET DAG-JSON traverses multiple links" '
curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$DAG_JSON_TRAVERSAL_CID/foo/link/bar?format=dag-json" > curl_output 2>&1 &&
jq --sort-keys . curl_output > actual &&
echo "{ \"hello\": \"this is not a link\" }" | jq --sort-keys . > expected &&
test_cmp expected actual
'

test_expect_success "GET DAG-CBOR traversal returns 404" '
curl --head "http://127.0.0.1:$GWAY_PORT/ipfs/$DAG_CBOR_TRAVERSAL_CID/foo/bar?format=dag-json" > curl_output 2>&1 &&
test_should_contain "404 Not Found" curl_output
test_expect_success "GET DAG-CBOR traversal returns 400 if there is path remainder" '
curl --head "http://127.0.0.1:$GWAY_PORT/ipfs/$DAG_CBOR_TRAVERSAL_CID/foo?format=dag-cbor" > curl_output 2>&1 &&
test_should_contain "400 Bad Request" curl_output
'

test_expect_success "GET DAG-PB has expected output" '
curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$DAG_PB_CID?format=dag-json" > curl_output 2>&1 &&
test_expect_success "GET DAG-CBOR traverses multiple links" '
curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$DAG_CBOR_TRAVERSAL_CID/foo/link/bar?format=dag-json" > curl_output 2>&1 &&
jq --sort-keys . curl_output > actual &&
test_cmp ../t0123-gateway-json-cbor/dag-pb.json actual
echo "{ \"hello\": \"this is not a link\" }" | jq --sort-keys . > expected &&
test_cmp expected actual
'

# test_expect_success "GET DAG-PB has expected output" '
# curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$DAG_PB_CID?format=dag-json" > curl_output 2>&1 &&
# jq --sort-keys . curl_output > actual &&
# test_cmp ../t0123-gateway-json-cbor/dag-pb.json actual
# '

test_kill_ipfs_daemon

test_done
test_done

Binary file modified test/sharness/t0123-gateway-json-cbor/dag-cbor-traversal.car
Binary file not shown.
Binary file modified test/sharness/t0123-gateway-json-cbor/dag-json-traversal.car
Binary file not shown.