-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
api.proto
1795 lines (1584 loc) · 76.3 KB
/
api.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright 2014 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.
syntax = "proto3";
package cockroach.roachpb;
option go_package = "roachpb";
import "roachpb/data.proto";
import "roachpb/errors.proto";
import "roachpb/metadata.proto";
import "storage/engine/enginepb/mvcc.proto";
import "storage/engine/enginepb/mvcc3.proto";
import "util/hlc/timestamp.proto";
import "util/tracing/recorded_span.proto";
import "gogoproto/gogo.proto";
// ReadConsistencyType specifies what type of consistency is observed
// during read operations.
enum ReadConsistencyType {
option (gogoproto.goproto_enum_prefix) = false;
// CONSISTENT reads are guaranteed to read committed data; the
// mechanism relies on clocks to determine lease expirations.
CONSISTENT = 0;
// READ_UNCOMMITTED reads return both committed and uncommitted data.
// The consistency type is similar to INCONSISTENT in that using it
// can result in dirty reads. However, like the CONSISTENT type, it
// requires the replica performing the read to hold a valid read lease,
// meaning that it can't return arbitrarily stale data.
READ_UNCOMMITTED = 1;
// INCONSISTENT reads return the latest available, committed values.
// They are more efficient, but may read stale values as pending
// intents are ignored.
INCONSISTENT = 2;
}
// RangeInfo describes a range which executed a request. It contains
// the range descriptor and lease information at the time of execution.
message RangeInfo {
RangeDescriptor desc = 1 [(gogoproto.nullable) = false];
Lease lease = 2 [(gogoproto.nullable) = false];
}
// RequestHeader is supplied with every storage node request.
message RequestHeader {
option (gogoproto.equal) = true;
reserved 1, 2;
// The key for request. If the request operates on a range, this
// represents the starting key for the range.
bytes key = 3 [(gogoproto.casttype) = "Key"];
// The end key is empty if the request spans only a single key. Otherwise,
// it must order strictly after Key. In such a case, the header indicates
// that the operation takes place on the key range from Key to EndKey,
// including Key and excluding EndKey.
bytes end_key = 4 [(gogoproto.casttype) = "Key"];
// A zero-indexed transactional sequence number.
int32 sequence = 5 [
(gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/storage/engine/enginepb.TxnSeq"];
}
// ResponseHeader is returned with every storage node response.
message ResponseHeader {
enum ResumeReason {
option (gogoproto.goproto_enum_prefix) = false;
// Zero value; no resume.
RESUME_UNKNOWN = 0;
// The spanning operation didn't finish because the key limit was
// exceeded.
RESUME_KEY_LIMIT = 1;
// The spanning operation didn't finish because a range boundary
// was encountered and the command was configured to stop at range
// boundaries.
RESUME_RANGE_BOUNDARY = 2;
}
// txn is non-nil if the request specified a non-nil transaction.
// The transaction timestamp and/or priority may have been updated,
// depending on the outcome of the request.
Transaction txn = 3;
// The next span to resume from when the response doesn't cover the full span
// requested. This can happen when a bound on the keys is set through
// max_span_request_keys in the batch header or when a scan has been stopped
// before covering the requested data because of scan_options.
//
// ResumeSpan is unset when the entire span of keys have been
// operated on. The span is set to the original span if the request
// was ignored because max_span_request_keys was hit due to another
// request in the batch. For a reverse scan the end_key is updated.
Span resume_span = 4;
// When resume_span is populated, this specifies the reason why the operation
// wasn't completed and needs to be resumed.
// This field appeared in v2.0. Responses from storage coming from older
// servers will not contain it, but the conversion from a BatchResponse to a
// client.Result always fills it in.
ResumeReason resume_reason = 7;
// The number of keys operated on.
int64 num_keys = 5;
// Range or list of ranges used to execute the request. Multiple
// ranges may be returned for Scan, ReverseScan or DeleteRange.
repeated RangeInfo range_infos = 6 [(gogoproto.nullable) = false];
}
// A GetRequest is the argument for the Get() method.
message GetRequest {
option (gogoproto.equal) = true;
RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
}
// A GetResponse is the return value from the Get() method.
// If the key doesn't exist, Value will be nil.
message GetResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
Value value = 2;
// The intent seen, if any, when using the READ_UNCOMMITTED consistency level.
Value intent_value = 3;
}
// A PutRequest is the argument to the Put() method.
message PutRequest {
option (gogoproto.equal) = true;
RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
Value value = 2 [(gogoproto.nullable) = false];
// Specify as true to put the value without a corresponding
// timestamp. This option should be used with care as it precludes
// the use of this value with transactions.
bool inline = 3;
// NOTE: For internal use only! Set to indicate that the put is
// writing to virgin keyspace and no reads are necessary to
// rationalize MVCC.
bool blind = 4;
}
// A PutResponse is the return value from the Put() method.
message PutResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
}
// A ConditionalPutRequest is the argument to the ConditionalPut() method.
//
// - Returns true and sets value if exp_value equals existing value.
// - If key doesn't exist and exp_value is nil, sets value.
// - If key exists, but value is empty and exp_value is not nil but empty, sets value.
// - Otherwise, returns an error containing the actual value of the key.
message ConditionalPutRequest {
option (gogoproto.equal) = true;
RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
// The value to put.
Value value = 2 [(gogoproto.nullable) = false];
// Set exp_value.bytes empty to test for non-existence. Specify as nil
// to indicate there should be no existing entry. This is different
// from the expectation that the value exists but is empty.
Value exp_value = 3;
// NOTE: For internal use only! Set to indicate that the put is
// writing to virgin keyspace and no reads are necessary to
// rationalize MVCC.
bool blind = 4;
// Typically if a specific, non-empty expected value is supplied, it *must*
// exist with that value. Passing this indicates that it is also OK if the key
// does not exist. This is useful when a given value is expected but it is
// possible it has not yet been written.
bool allow_if_does_not_exist = 5;
}
// A ConditionalPutResponse is the return value from the
// ConditionalPut() method.
message ConditionalPutResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
}
// An InitPutRequest is the argument to the InitPut() method.
//
// - If key doesn't exist, sets value.
// - If key exists, returns a ConditionFailedError if value != existing value
// If failOnTombstones is set to true, tombstone values count as mismatched
// values and will cause a ConditionFailedError.
message InitPutRequest {
option (gogoproto.equal) = true;
RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
Value value = 2 [(gogoproto.nullable) = false];
// NOTE: For internal use only! Set to indicate that the put is
// writing to virgin keyspace and no reads are necessary to
// rationalize MVCC.
bool blind = 3;
// If true, tombstones cause ConditionFailedErrors.
bool failOnTombstones = 4;
}
// A InitPutResponse is the return value from the InitPut() method.
message InitPutResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
}
// An IncrementRequest is the argument to the Increment() method. It
// increments the value for key, and returns the new value. If no
// value exists for a key, incrementing by 0 is not a noop, but will
// create a zero value. IncrementRequest cannot be called on a key set
// by Put() or ConditionalPut(). Similarly, Put() and ConditionalPut()
// cannot be invoked on an incremented key.
message IncrementRequest {
option (gogoproto.equal) = true;
RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
int64 increment = 2;
}
// An IncrementResponse is the return value from the Increment
// method. The new value after increment is specified in NewValue. If
// the value could not be decoded as specified, Error will be set.
message IncrementResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
int64 new_value = 2;
}
// A DeleteRequest is the argument to the Delete() method.
message DeleteRequest {
option (gogoproto.equal) = true;
RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
}
// A DeleteResponse is the return value from the Delete() method.
message DeleteResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
}
// A DeleteRangeRequest is the argument to the DeleteRange() method. It
// specifies the range of keys to delete.
message DeleteRangeRequest {
option (gogoproto.equal) = true;
RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
reserved 2;
// return the keys that are deleted in the response.
bool return_keys = 3;
// delete "inline" keys which are stored without MVCC timestamps. Note that
// an "inline" DeleteRange will fail if it attempts to delete any keys which
// contain timestamped (non-inline) values; this option should only be used on
// keys which are known to store inline values, such as data in cockroach's
// time series system.
//
// Similarly, attempts to delete keys with inline values will fail unless this
// flag is set to true; the setting must match the data being deleted.
//
// Inline values cannot be deleted transactionally; a DeleteRange with
// "inline" set to true will fail if it is executed within a transaction.
bool inline = 4;
}
// A DeleteRangeResponse is the return value from the DeleteRange()
// method.
message DeleteRangeResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
// All the deleted keys if return_keys is set.
repeated bytes keys = 2 [(gogoproto.casttype) = "Key"];
}
// A ClearRangeRequest is the argument to the ClearRange() method. It
// specifies a range of keys to clear from the underlying engine. Note
// that this differs from the behavior of DeleteRange, which sets
// transactional intents and writes tombstones to the deleted
// keys. ClearRange is used when permanently dropping or truncating
// table data.
//
// ClearRange also updates the GC threshold for the range to the
// timestamp at which this command executes, to prevent reads at
// earlier timestamps from incorrectly returning empty results.
//
// NOTE: it is important that this method only be invoked on a key
// range which is guaranteed to be both inactive and not see future
// writes. Ignoring this warning may result in data loss.
message ClearRangeRequest {
option (gogoproto.equal) = true;
RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
}
// A ClearRangeResponse is the return value from the ClearRange() method.
message ClearRangeResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
}
// ScanOptions is a collection of options for a batch of scans. The options
// apply to all the scans in the batch.
//
// If ScanOptions is present on a batch, the batch can only be made up of scan
// requests (i.e. {Reverse,}ScanReques), except for
// {Begin,End}TransactionRequest which are still allowed. Moreover, the batch
// cannot mix forward and reverse scans.
//
// TODO(andrei): add option to stop scan(s) when an intent is encountered.
message ScanOptions {
option (gogoproto.equal) = true;
// stop_at_range_boundary, if set, means that the scans will stop at the first
// range boundary after min_results is specified (if it is
// specified). If the end of a range stops the scans, the resume_span returned
// (for all the scans whose spans weren't fully scanned) represents the
// beginning of the next range (as ranges were at the time of the read).
//
// This flag can be combined with header.max_span_request_keys but that means
// that the desired number of keys may not be scanned.
//
// stop_at_range_boundary implies that DistSender will no longer parallelize
// the execution of requests between ranges; instead, either we're only
// speaking about scanning one range (if min_results is not set) or
// it executes the scan serially over different ranges. Note that this is
// also the case when max_span_request_keys is set (as the scan needs to
// return results in order), and SQL always sets max_span_request_keys.
bool stop_at_range_boundary = 1;
// min_results, if != 0, prevents the stop_at_range_boundary option from
// terminating the scans before this many keys have been touched. The
// counting is done across all the scans in the batch.
//
// A common value is 1, used if either the client knows that one key will be
// sufficient for its purposes or if the client wants to skip a prefix of
// empty ranges and seek to the point where some data is present.
//
// This can only be set if stop_at_range_boundary is set. If
// header.max_span_request_keys is set, min_results needs to be <=
// header.max_span_request_keys.
int64 min_results = 2;
}
// ScanFormat is an enumeration of the available response formats for MVCCScan
// operations.
enum ScanFormat {
option (gogoproto.goproto_enum_prefix) = false;
// The standard MVCCScan format: a slice of KeyValue messages.
KEY_VALUES = 0;
// The batch_response format: a byte slice of alternating keys and values,
// each prefixed by their length as a varint.
BATCH_RESPONSE = 1;
}
// A ScanRequest is the argument to the Scan() method. It specifies the
// start and end keys for an ascending scan of [start,end) and the maximum
// number of results (unbounded if zero).
message ScanRequest {
option (gogoproto.equal) = true;
reserved 2, 3;
RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
// The desired format for the response. If set to BATCH_RESPONSE, the server
// will set the batch_responses field in the ScanResponse instead of the rows
// field.
ScanFormat scan_format = 4;
}
// A ScanResponse is the return value from the Scan() method.
message ScanResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
// Empty if no rows were scanned.
repeated KeyValue rows = 2 [(gogoproto.nullable) = false];
// The intent rows seen when performing a scan at the READ_UNCOMMITTED
// consistency level. These rows do not count against the MaxSpanRequestKeys
// count.
repeated KeyValue intent_rows = 3 [(gogoproto.nullable) = false];
// If set, each item in this repeated bytes field contains part of the results
// in batch format - the key/value pairs are a buffer of varint-prefixed
// slices, alternating from key to value. Each entry in this field is
// complete - there are no key/value pairs that are split across more than one
// entry. There are num_keys total pairs across all entries, as defined by the
// ResponseHeader. If set, rows will not be set and vice versa.
repeated bytes batch_responses = 4;
}
// A ReverseScanRequest is the argument to the ReverseScan() method. It specifies the
// start and end keys for a descending scan of [start,end) and the maximum
// number of results (unbounded if zero).
message ReverseScanRequest {
option (gogoproto.equal) = true;
reserved 2, 3;
RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
// The desired format for the response. If set to BATCH_RESPONSE, the server
// will set the batch_responses field in the ScanResponse instead of the rows
// field.
ScanFormat scan_format = 4;
}
// A ReverseScanResponse is the return value from the ReverseScan() method.
message ReverseScanResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
// Empty if no rows were scanned.
repeated KeyValue rows = 2 [(gogoproto.nullable) = false];
// The intent rows seen when performing a scan at the READ_UNCOMMITTED
// consistency level. These rows do not count against the MaxSpanRequestKeys
// count.
repeated KeyValue intent_rows = 3 [(gogoproto.nullable) = false];
// If set, each item in this repeated bytes field contains part of the results
// in batch format - the key/value pairs are a buffer of varint-prefixed
// slices, alternating from key to value. Each entry in this field is
// complete - there are no key/value pairs that are split across more than one
// entry. There are num_keys total pairs across all entries, as defined by the
// ResponseHeader. If set, rows will not be set and vice versa.
repeated bytes batch_responses = 4;
}
enum ChecksumMode {
// CHECK_VIA_QUEUE is set for requests made from the consistency queue. In
// this mode, a full check is carried out, and depending on the result a
// recursive consistency check is triggered:
//
// 1. no inconsistency found: if recomputed stats don't match persisted stats,
// trigger a RecomputeStatsRequest.
// 2. inconsistency found: if a diff is available, print it and trigger fatal
// error. If no diff found, trigger recursive check with diff requested
// (which then triggers fatal error).
//
// TODO(tbg): these semantics are an artifact of how consistency checks were
// first implemented. The extra behavior here should move to the consistency
// check queue instead and this option dropped from the enum.
CHECK_VIA_QUEUE = 0;
// CHECK_FULL recomputes the hash of the replicate data in all replicas and
// uses this to determine whether there is an inconsistency.
CHECK_FULL = 1;
// CHECK_STATS only hashes the persisted lease applied state (which notably
// includes the persisted MVCCStats) only. This catches a large class of
// replica inconsistencies observed in the wild (where replicas apply a
// nonidentical log of commands, and as a result almost always have
// divergent stats), while doing work independent of the size of the data
// contained in the replicas.
CHECK_STATS = 2;
}
// A CheckConsistencyRequest is the argument to the CheckConsistency() method.
// It specifies the start and end keys for a span of ranges to which a
// consistency check should be applied. A consistency check on a range involves
// running a ComputeChecksum on the range followed by a storage.CollectChecksum.
message CheckConsistencyRequest {
option (gogoproto.equal) = true;
RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
// log a diff of inconsistencies if such inconsistencies are found. This is only
// valid if mode == FROM_QUEUE
bool with_diff = 2;
ChecksumMode mode = 3;
// Whether to create a RocksDB checkpoint on each replica at the log position
// at which the SHA is computed. The checkpoint is essentially a cheap point-
// in-time backup of the database. It will be put into the engines' auxiliary
// directory and needs to be removed manually to avoid leaking disk space.
bool checkpoint = 4;
}
// A CheckConsistencyResponse is the return value from the CheckConsistency() method.
// It returns the status the range was found in.
message CheckConsistencyResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
enum Status {
// No inconsistency was detected, but not all replicas returned a checksum.
RANGE_INDETERMINATE = 0;
// A definite inconsistency was detected.
RANGE_INCONSISTENT = 1;
// All replicas of the range agreed on the checksum.
RANGE_CONSISTENT = 2;
// Like RANGE_CONSISTENT, but the recomputed stats disagreed with the
// persisted stats. The persisted stats indicates estimates, so this is
// expected.
RANGE_CONSISTENT_STATS_ESTIMATED = 3;
// Like RANGE_CONSISTENT_STATS_ESTIMATED, but the mismatch occurred with
// persisted stats that claimed to be accurate. This is unexpected and
// likely indicates a bug in our logic to incrementally update the stats
// as commands are evaluated and applied.
RANGE_CONSISTENT_STATS_INCORRECT = 4;
}
message Result {
int64 range_id = 1 [(gogoproto.customname) = "RangeID", (gogoproto.casttype) = "RangeID"];
// start_key of the range corresponding to range_id (at the time of the
// check). This is useful to send additional requests to only a subset of
// ranges contained within a result later, as requests can only be routed by
// key.
bytes start_key = 2;
Status status = 3;
// detail contains information related to the operation. If no inconsistency
// is found, it contains informational value such as observed stats. If an
// inconsistency is found, it contains information about that inconsistency
// including the involved replica and, if requested, the diff.
string detail = 4;
}
// result contains a Result for each Range checked, in no particular order.
repeated Result result = 2 [(gogoproto.nullable) = false];
}
// An RecomputeStatsRequest triggers a stats recomputation on the Range addressed by
// the request.
//
// An error will be returned if the start key does not match the start key of the
// target Range.
//
// The stats recomputation touches essentially the whole range, but the command
// avoids having to block other commands by taking care to not interleave
// with splits, and by using the commutativity of stats updates. As a result,
// it is safe to invoke at any time, including repeatedly, though it should be
// used conservatively due to performing a full scan of the Range.
message RecomputeStatsRequest {
option (gogoproto.equal) = true;
RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
// When dry_run is true, the stats delta is computed, but no stats adjustment
// is performed. This isn't useful outside of testing since RecomputeStats is
// safe and idempotent.
bool dry_run = 2;
}
// An RecomputeStatsResponse is the response to an RecomputeStatsRequest.
message RecomputeStatsResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
// added_delta is the adjustment made to the range's stats, i.e. `new_stats = old_stats + added_delta`.
storage.engine.enginepb.MVCCStatsDelta added_delta = 2 [(gogoproto.nullable) = false];
}
// A BeginTransactionRequest is the argument to the BeginTransaction() method.
message BeginTransactionRequest {
option (gogoproto.equal) = true;
RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
}
// A BeginTransactionResponse is the return value from the BeginTransaction() method.
message BeginTransactionResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
}
// An EndTransactionRequest is the argument to the EndTransaction() method. It
// specifies whether to commit or roll back an extant transaction.
message EndTransactionRequest {
option (gogoproto.equal) = true;
RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
// False to abort and rollback.
bool commit = 2;
// If set, deadline represents the maximum (exclusive) timestamp at which the
// transaction can commit (i.e. the maximum timestamp for the txn's reads and
// writes).
// If EndTransaction(Commit=true) finds that the txn's timestamp has been
// pushed above this deadline, an error will be returned and the client is
// supposed to rollback the txn.
util.hlc.Timestamp deadline = 3;
// commit triggers. Note that commit triggers are for
// internal use only and will cause an error if requested through the
// external-facing KV API.
InternalCommitTrigger internal_commit_trigger = 4;
// List of intents written by the transaction. These are spans where
// write intents have been written which must be resolved on txn
// completion. Note that these spans may be condensed to cover
// aggregate spans if the keys written by the transaction exceeded
// a size threshold.
repeated Span intent_spans = 5 [(gogoproto.nullable) = false];
// Requires that the transaction completes as a 1 phase commit. This
// guarantees that all writes are to the same range and that no
// intents are left in the event of an error.
bool require_1pc = 6 [(gogoproto.customname) = "Require1PC"];
// Set to true if this transaction is serializable isolation but has
// accummulated no refresh spans. This allows the executing server
// to retry it locally on the fast path.
bool no_refresh_spans = 8;
// True to indicate that intent spans should be resolved with
// poison=true. This is used when the transaction is being aborted
// independently of the main thread of client operation, as in the
// case of an asynchronous abort from the TxnCoordSender on a failed
// heartbeat.
bool poison = 9;
reserved 7;
}
// An EndTransactionResponse is the return value from the
// EndTransaction() method. The final transaction record is returned
// as part of the response header. In particular, transaction status
// and timestamp will be updated to reflect final committed
// values. Clients may propagate the transaction timestamp as the
// final txn commit timestamp in order to preserve causal ordering
// between subsequent transactions.
message EndTransactionResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
reserved 2;
reserved 3;
// True if the transaction committed on the one phase commit path.
// This means that all writes which were part of the transaction
// were written as a single, atomic write batch to just one range.
bool one_phase_commit = 4;
}
// An AdminSplitRequest is the argument to the AdminSplit() method. The
// existing range which contains header.key is split by
// split_key. If split_key is not specified, then this method will
// determine a split key that is roughly halfway through the
// range. The existing range is resized to cover only its start key to
// the split key. The new range created by the split starts at the
// split key and extends to the original range's end key. If split_key
// is known, header.key should also be set to split_key.
//
// New range IDs for each of the split range's replica and a new Raft
// ID are generated by the operation. Split requests are done in the
// context of a distributed transaction which updates range addressing
// records, range metadata and finally, provides a commit trigger to
// update bookkeeping and instantiate the new range on commit.
//
// The new range contains range replicas located on the same stores;
// no range data is moved during this operation. The split can be
// thought of as a mostly logical operation, though some other
// metadata (e.g. abort span and range stats must be copied or
// recomputed).
message AdminSplitRequest {
option (gogoproto.equal) = true;
RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
bytes split_key = 2 [(gogoproto.casttype) = "Key"];
}
// An AdminSplitResponse is the return value from the AdminSplit()
// method.
message AdminSplitResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
}
// An AdminMergeRequest is the argument to the AdminMerge() method. A
// merge is performed by calling AdminMerge on the left-hand range of
// two consecutive ranges (i.e. the range which contains keys which
// sort first). This range will be the subsuming range and the right
// hand range will be subsumed. After the merge operation, the
// subsumed range will no longer exist and the subsuming range will
// now encompass all keys from its original start key to the end key
// of the subsumed range. If AdminMerge is called on the final range
// in the key space, it is a noop.
message AdminMergeRequest {
option (gogoproto.equal) = true;
RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
}
// An AdminMergeResponse is the return value from the AdminMerge()
// method.
message AdminMergeResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
}
// An AdminTransferLeaseRequest is the argument to the AdminTransferLease()
// method. A lease transfer allows an external entity to control the lease
// holder for a range. The target of the lease transfer needs to be a valid
// replica of the range.
message AdminTransferLeaseRequest {
option (gogoproto.equal) = true;
RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
int32 target = 2 [(gogoproto.casttype) = "StoreID"];
}
message AdminTransferLeaseResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
}
// An AdminChangeReplicasRequest is the argument to the AdminChangeReplicas()
// method. A change replicas operation allows adding or removing a set of
// replicas for a range.
message AdminChangeReplicasRequest {
option (gogoproto.equal) = true;
RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
ReplicaChangeType change_type = 2;
repeated ReplicationTarget targets = 3 [(gogoproto.nullable) = false];
// ExpDesc is the expected current range descriptor to modify. If ExpDesc is
// not nil and the value of the range descriptor is not identical to ExpDesc
// for the request will fail.
//
// If there is more than one change specified in targets, this expectation
// will be applied to the first change and subsequent changes will use the
// resultant descriptor from successfully applying the previous change.
// If a change with more than one target occurs concurrently with another
// it is possible that an error will occur after partial application of the
// change. Changes are applied in the order they appear in the request.
//
// This field was added for 19.1 release and must remain optional until 19.2.
// TODO(ajwerner): Make this non-nullable for 19.2.
RangeDescriptor exp_desc = 4;
}
message AdminChangeReplicasResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
// Desc is the value of the range descriptor upon success.
// This field was added for 19.1 release and must remain optional until 19.2.
// TODO(ajwerner): Make this non-nullable for 19.2.
RangeDescriptor desc = 2;
}
// An AdminRelocateRangeRequest is the argument to the AdminRelocateRange()
// method. Relocates the replicas for a range to the specified target stores.
// The first store in the list of targets becomes the new leaseholder.
message AdminRelocateRangeRequest {
option (gogoproto.equal) = true;
RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
repeated ReplicationTarget targets = 2 [(gogoproto.nullable) = false];
// TODO(a-robinson): Add "reason"/"details" string fields?
}
message AdminRelocateRangeResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
}
// A HeartbeatTxnRequest is arguments to the HeartbeatTxn()
// method. It's sent by transaction coordinators to let the system
// know that the transaction is still ongoing. Note that this
// heartbeat message is different from the heartbeat message in the
// gossip protocol.
message HeartbeatTxnRequest {
option (gogoproto.equal) = true;
RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
util.hlc.Timestamp now = 2 [(gogoproto.nullable) = false];
}
// A HeartbeatTxnResponse is the return value from the HeartbeatTxn()
// method. It returns the transaction info in the response header. The
// returned transaction lets the coordinator know the disposition of
// the transaction (i.e. aborted, committed, or pending).
message HeartbeatTxnResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
}
// A GCRequest is arguments to the GC() method. It's sent by range
// lease holders after scanning range data to find expired MVCC values.
message GCRequest {
option (gogoproto.equal) = true;
RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
message GCKey {
option (gogoproto.equal) = true;
bytes key = 1 [(gogoproto.casttype) = "Key"];
util.hlc.Timestamp timestamp = 2 [(gogoproto.nullable) = false];
}
repeated GCKey keys = 3 [(gogoproto.nullable) = false];
// Threshold is the expiration timestamp.
util.hlc.Timestamp threshold = 4 [(gogoproto.nullable) = false];
// TxnSpanGCThreshold is the timestamp below which inactive transactions were
// considered for GC (and thus might have been removed).
// TODO(nvanbenschoten): Remove this in 2.3, at which point we won't need to
// update it because no nodes in the cluster will ever consult it.
util.hlc.Timestamp txn_span_gc_threshold = 5 [(gogoproto.nullable) = false,
(gogoproto.customname) = "TxnSpanGCThreshold"];
}
// A GCResponse is the return value from the GC() method.
message GCResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
}
// PushTxnType determines what action to take when pushing a transaction.
enum PushTxnType {
option (gogoproto.goproto_enum_prefix) = false;
// Push the timestamp forward if possible to accommodate a concurrent reader.
PUSH_TIMESTAMP = 0;
// Abort the transaction if possible to accommodate a concurrent writer.
PUSH_ABORT = 1;
// Abort the transaction if it's abandoned, but don't attempt to mutate it
// otherwise.
PUSH_TOUCH = 2;
reserved 3;
}
// A PushTxnRequest is arguments to the PushTxn() method. It's sent by
// readers or writers which have encountered an "intent" laid down by
// another transaction. The goal is to resolve the conflict. Note that
// args.Key should be set to the txn ID of args.PusheeTxn, not
// args.PusherTxn. This RPC is addressed to the range which owns the pushee's
// txn record.
//
// Resolution is trivial if the txn which owns the intent has either
// been committed or aborted already. Otherwise, the existing txn can
// either be aborted (for write/write conflicts), or its commit
// timestamp can be moved forward (for read/write conflicts). The
// course of action is determined by the specified push type, and by
// the owning txn's status and priority.
message PushTxnRequest {
option (gogoproto.equal) = true;
RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
// Transaction which encountered the intent, if applicable. For a
// non-transactional pusher, pusher_txn will only have the priority set (in
// particular, ID won't be set). Used to compare priorities and timestamps if
// priorities are equal.
Transaction pusher_txn = 2 [(gogoproto.nullable) = false];
// Transaction to be pushed, as specified at the intent which led to
// the push transaction request. Note that this may not be the most
// up-to-date value of the transaction record, but will be set or
// merged as appropriate.
storage.engine.enginepb.TxnMeta pushee_txn = 3 [(gogoproto.nullable) = false];
// PushTo is the timestamp which PusheeTxn should be pushed to. During
// conflict resolution, it should be set just after the timestamp of the
// conflicting read or write.
util.hlc.Timestamp push_to = 4 [(gogoproto.nullable) = false];
// InclusivePushTo is sent by nodes to specify that their PushTo timestamp
// is the timestamp they want the transaction to be pushed to, instead of
// the timestamp before the one they want the transaction to be pushed to.
// It is used to assist that field's migration.
// TODO(nvanbenschoten): Remove this field in 19.2.
bool inclusive_push_to = 9;
// DeprecatedNow holds the timestamp used to compare the last heartbeat of the
// pushee against.
//
// The field remains for compatibility with 2.1 nodes. Users should set the
// same value for this field and the batch header timestamp.
// TODO(nvanbenschoten): Remove this field in 19.2.
util.hlc.Timestamp deprecated_now = 5 [(gogoproto.nullable) = false];
// Readers set this to PUSH_TIMESTAMP to move pushee_txn's provisional
// commit timestamp forward. Writers set this to PUSH_ABORT to request
// that pushee_txn be aborted if possible. Inconsistent readers set
// this to PUSH_TOUCH to determine whether the pushee can be aborted
// due to inactivity (based on the now field).
PushTxnType push_type = 6;
// Forces the push by overriding the normal expiration and priority checks
// in PushTxn to either abort or push the timestamp.
bool force = 7;
reserved 8;
}
// A PushTxnResponse is the return value from the PushTxn() method. It
// returns success and the resulting state of PusheeTxn if the
// conflict was resolved in favor of the caller; the caller should
// subsequently invoke ResolveIntent() on the conflicted key. It
// returns an error otherwise.
message PushTxnResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
// pushee_txn is non-nil if the transaction was pushed and contains
// the current value of the transaction.
// TODO(tschottdorf): Maybe this can be a TxnMeta instead; probably requires
// factoring out the new Priority.
Transaction pushee_txn = 2 [(gogoproto.nullable) = false];
}
// A RecoverTxnRequest is arguments to the RecoverTxn() method. It is sent
// during the recovery process for a transaction abandoned in the STAGING state.
// The sender is expected to have queried all of the abandoned transaction's
// in-flight writes and determined whether they all succeeded or not. This is
// used to determine whether the result of the recovery should be committing the
// abandoned transaction or aborting it.
message RecoverTxnRequest {
option (gogoproto.equal) = true;
RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
// Transaction record to recover.
storage.engine.enginepb.TxnMeta txn = 2 [(gogoproto.nullable) = false];
// Did all of the STAGING transaction's writes succeed? If so, the transaction
// is implicitly committed and the commit can be made explicit by giving its
// record a COMMITTED status. If not, the transaction can be aborted as long
// as a write that was found to have failed was prevented from ever succeeding
// in the future.
bool implicitly_committed = 3;
}
// A RecoverTxnResponse is the return value from the RecoverTxn() method.
message RecoverTxnResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
// Contains the finalized state of the recovered transaction.
Transaction recovered_txn = 2 [(gogoproto.nullable) = false];
}
// A QueryTxnResponse is arguments to the QueryTxn() method. It's sent
// by transactions which are waiting to push another transaction because
// of conflicting write intents to fetch updates to either the pusher's
// or the pushee's transaction records.
message QueryTxnRequest {
option (gogoproto.equal) = true;
RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
// Transaction record to query.
storage.engine.enginepb.TxnMeta txn = 2 [(gogoproto.nullable) = false];
// If true, the query will not return until there are changes to either the
// transaction status or priority -OR- to the set of dependent transactions.
bool wait_for_update = 3;
// Set of known dependent transactions.
repeated bytes known_waiting_txns = 4 [(gogoproto.customtype) = "github.com/cockroachdb/cockroach/pkg/util/uuid.UUID"];
}
// A QueryTxnResponse is the return value from the QueryTxn() method.
message QueryTxnResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
// Contains the current state of the queried transaction. If the queried
// transaction record does not exist, this will be empty.
Transaction queried_txn = 2 [(gogoproto.nullable) = false];
// Specifies a list of transaction IDs which are waiting on the txn.
repeated bytes waiting_txns = 3 [(gogoproto.customtype) = "github.com/cockroachdb/cockroach/pkg/util/uuid.UUID"];
}
// A QueryIntentRequest is arguments to the QueryIntent() method. It visits
// the specified key and checks whether an intent is present for the given
// transaction.
message QueryIntentRequest {
option (gogoproto.equal) = true;
RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
// The TxnMeta that the intent is expected to have. Specifically, whether an
// intent is a match or not is defined as whether an intent exists that could
// be committed by the provided transaction. If an intent is found at the
// specified key, the intent is only considered a match if it has the same ID,
// the same epoch, and a provisional commit timestamp that is equal to or less
// than that in the provided transaction. For SERIALIZABLE transactions, if
// the intent's timestamp is greater than that in the provided transaction, it
// would prevent the transaction from committing and is therefore not a match.
// However, for SNAPSHOT transactions, if the intent's timestamp is greater
// than that in the provided transaction, it would not prevent the transaction
// from committing and therefore is a match.
//
// Additionally, the intent is only considered a match if its sequence number
// is equal to or greater than the expected txn's sequence number. The
// requests doesn't require an exact sequence number match because the
// transaction could have performed overlapping writes, in which case only the
// latest sequence number will remain. We assume that if a transaction has
// successfully written an intent at a larger sequence number then it must
// have succeeeded in writing an intent at the smaller sequence number as
// well.
storage.engine.enginepb.TxnMeta txn = 2 [(gogoproto.nullable) = false];
enum IfMissingBehavior {
// Don't do anything special, just note that the intent was not found in the
// response.
DO_NOTHING = 0;
// Return an IntentMissingError. Special-cased to return a SERIALIZABLE
// retry error if a SERIALIZABLE transaction queries its own intent and
// finds it has been pushed.
RETURN_ERROR = 1;
// Prevent the intent from ever being written in the future. If set as the
// behavior, a response with found_intent=false implies that an intent will
// never be writtable at the key at the transaction's timestamp or below.
PREVENT = 2;
}
// The behavior of the request if the expected intent is found to be not
// present.
IfMissingBehavior if_missing = 3;
}
// A QueryIntentResponse is the return value from the QueryIntent() method.
message QueryIntentResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
// Whether an intent matching the expected transaction was found at the key.
bool found_intent = 2;
}
// A ResolveIntentRequest is arguments to the ResolveIntent()
// method. It is sent by transaction coordinators after success
// calling PushTxn to clean up write intents: either to remove, commit
// or move them forward in time.
message ResolveIntentRequest {
option (gogoproto.equal) = true;
RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
// The transaction whose intent is being resolved.
storage.engine.enginepb.TxnMeta intent_txn = 2 [(gogoproto.nullable) = false];
// The status of the transaction.
TransactionStatus status = 3;
// Optionally poison the abort span for the transaction the intent's
// range.
bool poison = 4;
}
// A ResolveIntentResponse is the return value from the
// ResolveIntent() method.
message ResolveIntentResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];
}
// A ResolveIntentRangeRequest is arguments to the ResolveIntentRange() method.