Skip to content

Commit

Permalink
rfcs: draft on SQL savepoints
Browse files Browse the repository at this point in the history
Release note: None
  • Loading branch information
knz committed Feb 6, 2020
1 parent dbeb04d commit e405ff2
Show file tree
Hide file tree
Showing 19 changed files with 1,219 additions and 0 deletions.
949 changes: 949 additions & 0 deletions docs/RFCS/20191014_savepoints.md

Large diffs are not rendered by default.

Binary file added docs/RFCS/20191014_savepoints/distsender.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
26 changes: 26 additions & 0 deletions docs/RFCS/20191014_savepoints/distsender.puml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
@startuml
box "On any node using txns"
participant "client.Txn" as Txn
participant TxnCoordSender
participant DistSender
end box
box "Elsewhere"
participant leaseholder1
participant leaseholder2
participant leaseholder3
end box

Txn -> TxnCoordSender : sender.Send(BatchRequest)
TxnCoordSender -> TxnCoordSender : (some txn logic)
TxnCoordSender -> DistSender : sender.Send(BatchRequest)
DistSender -> leaseholder1 : (distribute batch)
DistSender -> leaseholder2 : (distribute batch)
DistSender -> leaseholder3 : (distribute batch)
leaseholder1 --> DistSender : BatchResponse
leaseholder2 --> DistSender : BatchResponse
leaseholder3 --> DistSender : BatchResponse
DistSender -> DistSender : (merge responses)
DistSender --> TxnCoordSender : BatchResponse
TxnCoordSender -> TxnCoordSender : (some txn logic)
TxnCoordSender --> Txn : BatchResponse
@enduml
Binary file added docs/RFCS/20191014_savepoints/erecovery.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
22 changes: 22 additions & 0 deletions docs/RFCS/20191014_savepoints/erecovery.puml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
@startuml
participant "client.Txn" as Txn
participant TxnCoordSender
participant interceptors

Txn -> TxnCoordSender : (first instantiation)
TxnCoordSender -> TxnCoordSender : init txn object (epoch = 1)
activate TxnCoordSender
...
Txn -> TxnCoordSender : Send(BatchRequest)
TxnCoordSender -> interceptors : Send(BatchRequest)
...
interceptors -[#red]-> TxnCoordSender : recoverable error!
deactivate TxnCoordSender
TxnCoordSender -> TxnCoordSender : new txn object (epoch++)
activate TxnCoordSender
TxnCoordSender -> Txn : retry error
...
Txn -> TxnCoordSender : Send(BatchRequest)
TxnCoordSender -> interceptors : Send(BatchRequest)
...
@enduml
Binary file added docs/RFCS/20191014_savepoints/eunrecoverable.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
16 changes: 16 additions & 0 deletions docs/RFCS/20191014_savepoints/eunrecoverable.puml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
@startuml
participant "client.Txn" as Txn
participant TxnCoordSender
participant interceptors

Txn -> TxnCoordSender : Send(BatchRequest)
TxnCoordSender -> interceptors : Send(BatchRequest)
...
interceptors -[#red]-> TxnCoordSender : unrecoverable error!
TxnCoordSender -> TxnCoordSender : "txnState = txnError"
activate TxnCoordSender #red
TxnCoordSender -> Txn : error
...
Txn -> TxnCoordSender : Send(BatchRequest)
TxnCoordSender -> Txn : error (txn trashed)
@enduml
Binary file added docs/RFCS/20191014_savepoints/interceptors.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
35 changes: 35 additions & 0 deletions docs/RFCS/20191014_savepoints/interceptors.puml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
@startuml
participant "client.Txn" as Txn
participant TxnCoordSender
participant interceptor1
participant interceptor2
participant txnSeqNumAllocator
participant interceptor3
participant txnSpanRefresher
participant interceptor4
participant DistSender

Txn -> TxnCoordSender : Send(ba)
TxnCoordSender -> TxnCoordSender : (some txn logic)
TxnCoordSender -> interceptor1 : Send(ba)
interceptor1 -> interceptor2 : Send(ba)
interceptor2 -> txnSeqNumAllocator : Send(ba)
txnSeqNumAllocator -> txnSeqNumAllocator : assign seqnums in batch
txnSeqNumAllocator -> interceptor3 : Send(ba)
interceptor3 -> txnSpanRefresher : Send(ba)
txnSpanRefresher -> interceptor4 : Send(ba)
interceptor4 -> DistSender : Send(ba)
DistSender -> cluster : (distribute requests)
...
cluster --> DistSender : (merge responses)
DistSender --> interceptor4 : BatchResponse
interceptor4 --> txnSpanRefresher : BatchResponse
txnSpanRefresher --> txnSpanRefresher : (collect spans)
txnSpanRefresher --> interceptor3 : BatchResponse
interceptor3 --> txnSeqNumAllocator : BatchResponse
txnSeqNumAllocator --> interceptor2 : BatchResponse
interceptor2 --> interceptor1 : BatchResponse
interceptor1 --> TxnCoordSender : BatchResponse
TxnCoordSender -> TxnCoordSender : (some txn logic)
TxnCoordSender --> Txn : BatchResponse
@enduml
Binary file added docs/RFCS/20191014_savepoints/leafbase.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
53 changes: 53 additions & 0 deletions docs/RFCS/20191014_savepoints/leafbase.puml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
@startuml
skinparam BoxPadding 10

box "On the gateway node"
participant SQL
participant RootTxn
end box
box "On another node"
participant dSQLServer
participant LeafTxn
end box
participant cluster

create RootTxn
SQL -> RootTxn : client.NewTxn(RootTxn)
...
note left of SQL
forking RootTxn
into LeafTxn meta
end note
SQL -> RootTxn : txn.GetTxnCoordMeta()
RootTxn --> SQL : TxnCoordMeta
SQL -> SQL : leafmeta.StripRootToLeaf()
SQL -> dSQLServer : SetupFlow(proc spec, leafmeta)
note over dSQLServer,LeafTxn: (dSQL proc starts exec)
note left of SQL: actually instantiating LeafTxn
create LeafTxn
dSQLServer -> LeafTxn : client.NewTxnWithCoordMeta()
...
note left of SQL
LeafTxn issuing reads
on behalf of RootTxn
end note
dSQLServer -> LeafTxn : txn.Run(client.Batch)
LeafTxn -> cluster : txn.sender.Send(roachpb.BatchRequest)
...
cluster --> LeafTxn : BatchResponse
LeafTxn --> dSQLServer : Batch modified in-place
dSQLServer --> SQL : some results
...
note over dSQLServer,LeafTxn: (dSQL proc finishes exec)
note left of SQL
updates to LeafTxn
repatriated into RootTxn
end note
dSQLServer -> LeafTxn : GetTxnCoordMeta()
LeafTxn --> dSQLServer : TxnCoordMeta
dSQLServer -> dSQLServer : leafmeta.StripLeafToRoot()
dSQLServer --> SQL : final results + leafmeta
SQL -> RootTxn : txn.AugmentTxnCoordMeta(leafmeta)
...
SQL -> RootTxn : Commit/Rollback/CleanupOnError
@enduml
Binary file added docs/RFCS/20191014_savepoints/mismatch.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
31 changes: 31 additions & 0 deletions docs/RFCS/20191014_savepoints/mismatch.puml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
@startuml
participant SQL
participant "RootTxn\n+TxnCoordSender" as RootTxn
participant "LeafTxn\n+TxnCoordSender" as LeafTxn

create RootTxn
SQL -> RootTxn : NewTxn()
RootTxn -> RootTxn : init txn object (epoch = 1)
activate RootTxn
SQL -> RootTxn : GetMeta()
RootTxn --> SQL : leafmeta
create LeafTxn
SQL -> LeafTxn : NewTxn(leafmeta)
LeafTxn -> LeafTxn : init txn object (epoch = 1)
activate LeafTxn
...
SQL -> RootTxn : Send(BatchRequest)
RootTxn -> cluster : Send(BatchRequest)
cluster -[#red]-> RootTxn : recoverable error!
deactivate RootTxn
RootTxn -> RootTxn : new txn object (epoch = 2)
activate RootTxn
...
SQL -> LeafTxn : Send(BatchRequest)
LeafTxn -> cluster : Send(BatchRequest)
cluster -[#red]-> LeafTxn : recoverable error!
deactivate LeafTxn
LeafTxn --> SQL : leafmeta + error
SQL -> RootTxn : Augment(leafmeta)
RootTxn -> RootTxn : woops? (leafmeta.epoch = 1 vs txn epoch = 2)
@enduml
Binary file added docs/RFCS/20191014_savepoints/txnbase.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
21 changes: 21 additions & 0 deletions docs/RFCS/20191014_savepoints/txnbase.puml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
@startuml
box "On the gateway node"
participant SQL
participant RootTxn
end box
participant cluster

note over SQL: SQL transaction starts
create RootTxn
SQL -> RootTxn : client.NewTxn(RootTxnn)
...
note over SQL: during query execution
SQL -> RootTxn : txn.Run(client.Batch)
RootTxn -> cluster : txn.sender.Send(roachpb.BatchRequest)
...
cluster --> RootTxn : BatchResponse
RootTxn --> SQL : Batch modified in-place
...
note over SQL: when SQL txn completes
SQL -> RootTxn : Commit/Rollback/CleanupOnError
@enduml
Binary file added docs/RFCS/20191014_savepoints/txncoordsender.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
37 changes: 37 additions & 0 deletions docs/RFCS/20191014_savepoints/txncoordsender.puml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
@startuml
box "On any node using txns"
participant "SQL executor\nor distsql server" as SQL
participant "RootTxn\nor LeafTxn" as Txn
participant TxnCoordSender
end box
participant cluster

note over SQL: SQL transaction starts
create Txn
SQL -> Txn : client.NewTxn(Txn)
create TxnCoordSender
Txn -> TxnCoordSender : db.factory.TransactionalSender()
TxnCoordSender -> TxnCoordSender : initialize txn object
...
note over SQL: during query execution
SQL -> Txn : txn.Run(client.Batch)
Txn -> Txn : translate Batch into BatchRequest
Txn -> TxnCoordSender: sender.Send(BatchRequest)
TxnCoordSender -> TxnCoordSender: populate txn object into batch
TxnCoordSender -> cluster : distsender.Send(BatchRequest)
...
cluster --> TxnCoordSender : BatchResponse w/ txn object update
TxnCoordSender -> TxnCoordSender: txn.Update(resp.Txn)
TxnCoordSender --> Txn : BatchResponse
Txn --> SQL : Batch modified in-place
...
note over SQL
when SQL txn completes
(only on the SQL gateway / RootTxn)
end note
SQL -> Txn : Commit/Rollback/CleanupOnError
Txn -> Txn : construct BatchRequest with EndTxnRequest
Txn -> TxnCoordSender : sender.Send(BatchRequest)
TxnCoordSender --> cluster : clean up (not always)
TxnCoordSender -> TxnCoordSender : finalize txn
@enduml
Binary file added docs/RFCS/20191014_savepoints/zombie.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
29 changes: 29 additions & 0 deletions docs/RFCS/20191014_savepoints/zombie.puml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
@startuml
skinparam BoxPadding 10

box "On gateway node"
participant SQL
participant "RootTxn\n+TxnCoordSender" as RootTxn
end box
box "On other node"
participant dSQLServer
participant "LeafTxn\n+TxnCoordSender" as LeafTxn
end box
create RootTxn
SQL -> RootTxn : NewTxn()
SQL -> RootTxn : GetMeta()
RootTxn --> SQL : leafmeta
SQL -> dSQLServer : SetupFlow
create LeafTxn
dSQLServer -> LeafTxn : NewTxn(leafmeta)
...
SQL -> RootTxn : Send(BatchRequest)
RootTxn -> cluster : Send(BatchRequest)
cluster -[#red]-> RootTxn : unrecoverable error!
RootTxn -> RootTxn : txnState = txnError
activate RootTxn #red
...
dSQLServer -> LeafTxn : Send(BatchRequest)
LeafTxn -> cluster : Send(BatchRequest)
cluster -> cluster : **wut? zombie txn?**
@enduml

0 comments on commit e405ff2

Please sign in to comment.