Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cmd/swarm/swarm-smoke: Trigger chunk debug on timeout #19101

Merged
merged 8 commits into from
Feb 18, 2019
25 changes: 16 additions & 9 deletions cmd/swarm/swarm-smoke/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,16 @@ var (
)

var (
allhosts string
hosts []string
filesize int
syncDelay int
httpPort int
wsPort int
verbosity int
timeout int
single bool
allhosts string
hosts []string
filesize int
syncDelay int
httpPort int
wsPort int
verbosity int
timeout int
single bool
trackTimeout int
)

func main() {
Expand Down Expand Up @@ -102,6 +103,12 @@ func main() {
Usage: "whether to fetch content from a single node or from all nodes",
Destination: &single,
},
cli.IntFlag{
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure why we need this to be configurable at all?

Name: "track-timeout",
Value: 5,
Usage: "timeout in seconds to wait for GetAllReferences to return",
Destination: &trackTimeout,
},
}

app.Flags = append(app.Flags, []cli.Flag{
Expand Down
71 changes: 70 additions & 1 deletion cmd/swarm/swarm-smoke/upload_and_sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,19 @@ package main

import (
"bytes"
"context"
"fmt"
"io/ioutil"
"math/rand"
"os"
"sync"
"time"

"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/metrics"
"github.com/ethereum/go-ethereum/rpc"
"github.com/ethereum/go-ethereum/swarm/api"
"github.com/ethereum/go-ethereum/swarm/storage"
"github.com/ethereum/go-ethereum/swarm/testutil"
"github.com/pborman/uuid"

Expand All @@ -49,12 +55,75 @@ func uploadAndSyncCmd(ctx *cli.Context, tuid string) error {
case <-time.After(time.Duration(timeout) * time.Second):
metrics.GetOrRegisterCounter(fmt.Sprintf("%s.timeout", commandName), nil).Inc(1)

e := fmt.Errorf("timeout after %v sec", timeout)
// trigger debug functionality on randomBytes
err := trackChunks(randomBytes[:])
if err != nil {
e = fmt.Errorf("%v; triggerChunkDebug failed: %v", e, err)
}

return fmt.Errorf("timeout after %v sec", timeout)
return e
}
}

func trackChunks(testData []byte) error {
log.Warn("Test timed out; running chunk debug sequence")

addrs, err := getAllRefs(testData)
if err != nil {
return err
}
log.Trace("All references retrieved")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we really need these many logs?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I vote yes. Smoke tests are very difficult to debug, as they are spanning nodes, it's not just one go process. There are also no unit tests.


// has-chunks
for _, host := range hosts {
httpHost := fmt.Sprintf("ws://%s:%d", host, 8546)
log.Trace("Calling `Has` on host", "httpHost", httpHost)
rpcClient, err := rpc.Dial(httpHost)
if err != nil {
log.Trace("Error dialing host", "err", err)
return err
}
log.Trace("rpc dial ok")
var hasInfo []api.HasInfo
err = rpcClient.Call(&hasInfo, "bzz_has", addrs)
if err != nil {
log.Trace("Error calling host", "err", err)
return err
}
log.Trace("rpc call ok")
count := 0
for _, info := range hasInfo {
if !info.Has {
count++
log.Error("Host does not have chunk", "host", httpHost, "chunk", info.Addr)
}
}
if count == 0 {
log.Info("Host reported to have all chunks", "host", httpHost)
}
}
return nil
}

func getAllRefs(testData []byte) (storage.AddressCollection, error) {
log.Trace("Getting all references for given root hash")
datadir, err := ioutil.TempDir("", "chunk-debug")
if err != nil {
return nil, fmt.Errorf("unable to create temp dir: %v", err)
}
defer os.RemoveAll(datadir)
fileStore, err := storage.NewLocalFileStore(datadir, make([]byte, 32))
if err != nil {
return nil, err
}
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(trackTimeout)*time.Second)
defer cancel()

reader := bytes.NewReader(testData)
return fileStore.GetAllReferences(ctx, reader, false)
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you please just fix the typo one line below? <3

func uplaodAndSync(c *cli.Context, randomBytes []byte, tuid string) error {
log.Info("uploading to "+httpEndpoint(hosts[0])+" and syncing", "tuid", tuid, "seed", seed)

Expand Down