Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ludicrous Mode #4872

Merged
merged 26 commits into from
Mar 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
59dc4b9
Add ludicrous mode and use it for startTs
animesh2049 Feb 20, 2020
ca9e94e
Stop syncing to disc
animesh2049 Feb 20, 2020
26314a9
Don't wait for mutations to be applied
animesh2049 Feb 21, 2020
6182e71
Remove Transactions
animesh2049 Feb 24, 2020
0703f73
apply mutations in background
animesh2049 Feb 24, 2020
6e80851
added logic for batching requests
Feb 25, 2020
8c8fdfa
minor changes
Feb 25, 2020
29a3e54
temp
Feb 25, 2020
0ae09e0
Hanlde graceful shutdown in worker
animesh2049 Feb 26, 2020
65e63dd
merged some code
Feb 26, 2020
25113cc
Don't abort old transaction in lud mode
animesh2049 Feb 26, 2020
d5813ce
Merge branch 'animesh2049/ludicrous-mode' of github.com:dgraph-io/dgr…
animesh2049 Feb 26, 2020
c0c8e0e
Changed back somethings
Feb 27, 2020
0474608
Merge branch 'animesh2049/ludicrous-mode' of github.com:dgraph-io/dgr…
Feb 27, 2020
82e3ebc
Skip group checksum
animesh2049 Feb 27, 2020
fe910b6
Temp
animesh2049 Feb 27, 2020
02fa91e
Merge branch 'animesh2049/ludicrous-mode' into animesh2049/lud-mode-2
animesh2049 Feb 27, 2020
e7cf9ed
Some cleanup
animesh2049 Feb 28, 2020
8e1ccd6
Address PR comments
animesh2049 Feb 28, 2020
a2bfc8c
Flush data in background when applying mutation.
Feb 28, 2020
169af3f
Address PR comments
animesh2049 Mar 4, 2020
f627bf7
Address PR comments
animesh2049 Mar 4, 2020
792b934
Simplify the timestamp allocation logic
manishrjain Mar 4, 2020
2b229f4
Address PR comments
animesh2049 Mar 5, 2020
99df3c6
Address PR comments
animesh2049 Mar 5, 2020
79528bc
minor fix
animesh2049 Mar 5, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion dgraph/cmd/alpha/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ they form a Raft group and provide synchronous replication.
grpc.EnableTracing = false

flag.Bool("graphql_introspection", true, "Set to false for no GraphQL schema introspection")

flag.Bool("ludicrous_mode", false, "Run alpha in ludicrous mode")
}

func setupCustomTokenizers() {
Expand Down Expand Up @@ -578,6 +578,7 @@ func run() {
SnapshotAfter: Alpha.Conf.GetInt("snapshot_after"),
AbortOlderThan: abortDur,
StartTime: startTime,
LudicrousMode: Alpha.Conf.GetBool("ludicrous_mode"),
}

setupCustomTokenizers()
Expand Down
8 changes: 6 additions & 2 deletions dgraph/cmd/zero/raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,10 @@ func (n *node) Run() {
go n.updateZeroMembershipPeriodically(closer)
go n.checkQuorum(closer)
go n.RunReadIndexLoop(closer, readStateCh)
if x.WorkerConfig.LudicrousMode {
closer.AddRunning(1)
go x.StoreSync(n.Store, closer)
}
// We only stop runReadIndexLoop after the for loop below has finished interacting with it.
// That way we know sending to readStateCh will not deadlock.

Expand Down Expand Up @@ -676,13 +680,13 @@ func (n *node) Run() {
}
n.SaveToStorage(&rd.HardState, rd.Entries, &rd.Snapshot)
timer.Record("disk")
if rd.MustSync {
span.Annotatef(nil, "Saved to storage")
if !x.WorkerConfig.LudicrousMode && rd.MustSync {
if err := n.Store.Sync(); err != nil {
glog.Errorf("Error while calling Store.Sync: %v", err)
}
timer.Record("sync")
}
span.Annotatef(nil, "Saved to storage")

if !raft.IsEmptySnap(rd.Snapshot) {
var state pb.MembershipState
Expand Down
5 changes: 5 additions & 0 deletions dgraph/cmd/zero/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ instances to achieve high-availability.
// about the status of supporting annotation logs through the datadog exporter
flag.String("datadog.collector", "", "Send opencensus traces to Datadog. As of now, the trace"+
" exporter does not support annotation logs and would discard them.")
flag.Bool("ludicrous_mode", false, "Run zero in ludicrous mode")
}

func setupListener(addr string, port int, kind string) (listener net.Listener, err error) {
Expand Down Expand Up @@ -170,6 +171,10 @@ func run() {
rebalanceInterval: Zero.Conf.GetDuration("rebalance_interval"),
}

x.WorkerConfig = x.WorkerOptions{
LudicrousMode: Zero.Conf.GetBool("ludicrous_mode"),
}

if opts.numReplicas < 0 || opts.numReplicas%2 == 0 {
log.Fatalf("ERROR: Number of replicas must be odd for consensus. Found: %d",
opts.numReplicas)
Expand Down
22 changes: 19 additions & 3 deletions edgraph/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,10 +271,12 @@ func (s *Server) doMutate(ctx context.Context, qc *queryContext, resp *api.Respo
if len(qc.gmuList) == 0 {
return nil
}

if ctx.Err() != nil {
return ctx.Err()
}
if x.WorkerConfig.LudicrousMode {
qc.req.StartTs = worker.State.GetTimestamp(false)
}

start := time.Now()
defer func() {
Expand Down Expand Up @@ -323,6 +325,15 @@ func (s *Server) doMutate(ctx context.Context, qc *queryContext, resp *api.Respo
qc.span.Annotatef(nil, "Applying mutations: %+v", m)
resp.Txn, err = query.ApplyMutations(ctx, m)
qc.span.Annotatef(nil, "Txn Context: %+v. Err=%v", resp.Txn, err)

if x.WorkerConfig.LudicrousMode {
// Mutations are automatically committed in case of ludicrous mode, so we don't
// need to manually commit.
resp.Txn.Keys = resp.Txn.Keys[:0]
resp.Txn.CommitTs = qc.req.StartTs
return err
}

if !qc.req.CommitNow {
if err == zero.ErrConflict {
err = status.Error(codes.FailedPrecondition, err.Error())
Expand Down Expand Up @@ -794,7 +805,7 @@ func (s *Server) doQuery(ctx context.Context, req *api.Request, doAuth AuthMode)
// assigned in the processQuery function called below.
defer annotateStartTs(qc.span, qc.req.StartTs)
// For mutations, we update the startTs if necessary.
if isMutation && req.StartTs == 0 {
if isMutation && req.StartTs == 0 && !x.WorkerConfig.LudicrousMode {
start := time.Now()
req.StartTs = worker.State.GetTimestamp(false)
qc.latency.AssignTimestamp = time.Since(start)
Expand Down Expand Up @@ -825,7 +836,12 @@ func processQuery(ctx context.Context, qc *queryContext) (*api.Response, error)
if len(qc.req.Query) == 0 {
return resp, nil
}

if ctx.Err() != nil {
return resp, ctx.Err()
}
if x.WorkerConfig.LudicrousMode {
qc.req.StartTs = posting.Oracle().MaxAssigned()
}
qr := query.Request{
Latency: qc.latency,
GqlQuery: &qc.gqlRes,
Expand Down
4 changes: 4 additions & 0 deletions posting/list.go
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,10 @@ func (l *List) addMutationInternal(ctx context.Context, txn *Txn, t *pb.Directed
}
l.updateMutationLayer(mpost)

if x.WorkerConfig.LudicrousMode {
return nil
}

// We ensure that commit marks are applied to posting lists in the right
// order. We can do so by proposing them in the same order as received by the Oracle delta
// stream from Zero, instead of in goroutines.
Expand Down
4 changes: 4 additions & 0 deletions posting/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,10 @@ func (w *TxnWriter) Flush() error {
glog.Errorf("Error while calling Sync from TxnWriter.Flush: %v", err)
}
}()
return w.Wait()
}

func (w *TxnWriter) Wait() error {
w.wg.Wait()
select {
case err := <-w.che:
Expand Down
45 changes: 36 additions & 9 deletions worker/draft.go
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,14 @@ func (n *node) applyCommitted(proposal *pb.Proposal) error {
span.Annotatef(nil, "While applying mutations: %v", err)
return err
}
if x.WorkerConfig.LudicrousMode {
ts := proposal.Mutations.StartTs
return n.commitOrAbort(proposal.Key, &pb.OracleDelta{
Txns: []*pb.TxnStatus{
{StartTs: ts, CommitTs: ts},
},
})
}
span.Annotate(nil, "Done")
return nil
}
Expand Down Expand Up @@ -554,12 +562,20 @@ func (n *node) commitOrAbort(pkey string, delta *pb.OracleDelta) error {
for _, status := range delta.Txns {
toDisk(status.StartTs, status.CommitTs)
}
if err := writer.Flush(); err != nil {
return errors.Wrapf(err, "while flushing to disk")
if x.WorkerConfig.LudicrousMode {
if err := writer.Wait(); err != nil {
glog.Errorf("Error while waiting to commit: +%v", err)
}
} else {
if err := writer.Flush(); err != nil {
return errors.Wrapf(err, "while flushing to disk")
}
}

g := groups()
atomic.StoreUint64(&g.deltaChecksum, delta.GroupChecksums[g.groupId()])
if delta.GroupChecksums != nil && delta.GroupChecksums[g.groupId()] > 0 {
atomic.StoreUint64(&g.deltaChecksum, delta.GroupChecksums[g.groupId()])
}

// Now advance Oracle(), so we can service waiting reads.
posting.Oracle().ProcessDelta(delta)
Expand Down Expand Up @@ -790,6 +806,13 @@ func (n *node) Run() {
go n.checkpointAndClose(done)
go n.ReportRaftComms()

if x.WorkerConfig.LudicrousMode {
closer := y.NewCloser(2)
defer closer.SignalAndWait()
go x.StoreSync(n.Store, closer)
go x.StoreSync(pstore, closer)
}

applied, err := n.Store.Checkpoint()
if err != nil {
glog.Errorf("While trying to find raft progress: %v", err)
Expand Down Expand Up @@ -907,18 +930,18 @@ func (n *node) Run() {
// Store the hardstate and entries. Note that these are not CommittedEntries.
n.SaveToStorage(&rd.HardState, rd.Entries, &rd.Snapshot)
timer.Record("disk")
if rd.MustSync {
if err := n.Store.Sync(); err != nil {
glog.Errorf("Error while calling Store.Sync: %+v", err)
}
timer.Record("sync")
}
if span != nil {
span.Annotatef(nil, "Saved %d entries. Snapshot, HardState empty? (%v, %v)",
len(rd.Entries),
raft.IsEmptySnap(rd.Snapshot),
raft.IsEmptyHardState(rd.HardState))
}
if !x.WorkerConfig.LudicrousMode && rd.MustSync {
if err := n.Store.Sync(); err != nil {
glog.Errorf("Error while calling Store.Sync: %+v", err)
}
timer.Record("sync")
}

// Now schedule or apply committed entries.
var proposals []*pb.Proposal
Expand Down Expand Up @@ -953,6 +976,10 @@ func (n *node) Run() {
if span := otrace.FromContext(pctx.Ctx); span != nil {
span.Annotate(nil, "Proposal found in CommittedEntries")
}
if x.WorkerConfig.LudicrousMode {
// Assuming that there will be no error while proposing.
n.Proposals.Done(proposal.Key, nil)
}
}
proposal.Index = entry.Index
proposals = append(proposals, proposal)
Expand Down
2 changes: 2 additions & 0 deletions x/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ type WorkerOptions struct {
ProposedGroupId uint32
// StartTime is the start time of the alpha
StartTime time.Time
// LudicrousMode is super fast mode with fewer guarantees.
LudicrousMode bool
}

// WorkerConfig stores the global instance of the worker package's options.
Expand Down
19 changes: 19 additions & 0 deletions x/x.go
Original file line number Diff line number Diff line change
Expand Up @@ -889,3 +889,22 @@ func RunVlogGC(store *badger.DB, closer *y.Closer) {
}
}
}

type DB interface {
Sync() error
}

func StoreSync(db DB, closer *y.Closer) {
defer closer.Done()
ticker := time.NewTicker(1 * time.Second)
for {
select {
case <-ticker.C:
if err := db.Sync(); err != nil {
glog.Errorf("Error while calling db sync: %+v", err)
}
case <-closer.HasBeenClosed():
return
}
}
}