-
Notifications
You must be signed in to change notification settings - Fork 400
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
implement committed entries pagination #356
Changes from 8 commits
3ef6aad
6b92f86
6c081ef
7bf8e65
b9a3621
2538e80
e7b4d25
8fbd9a2
9e3b839
b7c662e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -492,3 +492,168 @@ fn test_set_priority() { | |
assert_eq!(raw_node.raft.priority, p); | ||
} | ||
} | ||
|
||
// test_append_pagination ensures that a message will never be sent with entries size overflowing the `max_msg_size` | ||
#[test] | ||
fn test_append_pagination() { | ||
use std::cell::Cell; | ||
use std::rc::Rc; | ||
let l = default_logger(); | ||
let mut config = new_test_config(1, 10, 1); | ||
let max_size_per_msg = 2048; | ||
config.max_size_per_msg = max_size_per_msg; | ||
let mut nt = Network::new_with_config(vec![None, None, None], &config, &l); | ||
let seen_full_msg = Rc::new(Cell::new(false)); | ||
let b = seen_full_msg.clone(); | ||
nt.msg_hook = Some(Box::new(move |m: &Message| -> bool { | ||
if m.msg_type == MessageType::MsgAppend { | ||
let total_size = m.entries.iter().fold(0, |acc, e| acc + e.data.len()); | ||
if total_size as u64 > max_size_per_msg { | ||
panic!("sent MsgApp that is too large: {} bytes", total_size); | ||
} | ||
if total_size as u64 > max_size_per_msg / 2 { | ||
b.set(true); | ||
} | ||
} | ||
true | ||
})); | ||
nt.send(vec![new_message(1, 1, MessageType::MsgHup, 0)]); | ||
nt.isolate(1); | ||
for _ in 0..5 { | ||
let data = "a".repeat(1000); | ||
nt.send(vec![new_message_with_entries( | ||
1, | ||
1, | ||
MessageType::MsgPropose, | ||
vec![new_entry(0, 0, Some(&data))], | ||
)]); | ||
} | ||
nt.recover(); | ||
// After the partition recovers, tick the clock to wake everything | ||
// back up and send the messages. | ||
nt.send(vec![new_message(1, 1, MessageType::MsgBeat, 0)]); | ||
assert!( | ||
seen_full_msg.get(), | ||
"didn't see any messages more than half the max size; something is wrong with this test" | ||
); | ||
} | ||
|
||
// test_commit_pagination ensures that the max size of committed entries must be limit under `max_committed_size_per_ready` to per ready | ||
#[test] | ||
fn test_commit_pagination() { | ||
let l = default_logger(); | ||
let storage = MemStorage::new_with_conf_state((vec![1], vec![])); | ||
let mut config = new_test_config(1, 10, 1); | ||
config.max_committed_size_per_ready = 2048; | ||
let mut raw_node = RawNode::new(&config, storage, &l).unwrap(); | ||
raw_node.campaign().unwrap(); | ||
let rd = raw_node.ready(); | ||
let committed_len = rd.committed_entries.as_ref().unwrap().len(); | ||
assert_eq!( | ||
committed_len, 1, | ||
"expected 1 (empty) entry, got {}", | ||
committed_len | ||
); | ||
raw_node.mut_store().wl().append(rd.entries()).unwrap(); | ||
raw_node.advance(rd); | ||
let blob = "a".repeat(1000).into_bytes(); | ||
for _ in 0..3 { | ||
raw_node.propose(vec![], blob.clone()).unwrap(); | ||
} | ||
// The 3 proposals will commit in two batches. | ||
let rd = raw_node.ready(); | ||
let committed_len = rd.committed_entries.as_ref().unwrap().len(); | ||
assert_eq!( | ||
committed_len, 2, | ||
"expected 2 entries in first batch, got {}", | ||
committed_len | ||
); | ||
raw_node.mut_store().wl().append(rd.entries()).unwrap(); | ||
raw_node.advance(rd); | ||
|
||
let rd = raw_node.ready(); | ||
let committed_len = rd.committed_entries.as_ref().unwrap().len(); | ||
assert_eq!( | ||
committed_len, 1, | ||
"expected 1 entry in second batch, got {}", | ||
committed_len | ||
); | ||
raw_node.mut_store().wl().append(rd.entries()).unwrap(); | ||
raw_node.advance(rd); | ||
} | ||
|
||
// test_commit_pagination_after_restart regression tests a scenario in which the | ||
// Storage's Entries size limitation is slightly more permissive than Raft's | ||
// internal one | ||
// | ||
// - node learns that index 11 is committed | ||
// - next_entries returns index 1..10 in committed_entries (but index 10 already | ||
// exceeds maxBytes), which isn't noticed internally by Raft | ||
// - Commit index gets bumped to 10 | ||
// - the node persists the HardState, but crashes before applying the entries | ||
// - upon restart, the storage returns the same entries, but `slice` takes a | ||
// different code path and removes the last entry. | ||
// - Raft does not emit a HardState, but when the app calls advance(), it bumps | ||
// its internal applied index cursor to 10 (when it should be 9) | ||
// - the next Ready asks the app to apply index 11 (omitting index 10), losing a | ||
// write. | ||
#[test] | ||
fn test_commit_pagination_after_restart() { | ||
let mut persisted_hard_state = HardState::default(); | ||
persisted_hard_state.set_term(1); | ||
persisted_hard_state.set_vote(1); | ||
persisted_hard_state.set_commit(10); | ||
let s = IgnoreSizeHintMemStorage::default(); | ||
s.inner.wl().set_hardstate(persisted_hard_state); | ||
let ents_count = 10; | ||
let mut ents = Vec::with_capacity(ents_count); | ||
let mut size = 0u64; | ||
for i in 0..ents_count as u64 { | ||
let e = new_entry(1, i + 1, Some("a")); | ||
size += u64::from(e.compute_size()); | ||
ents.push(e); | ||
} | ||
s.inner.wl().append(&ents).unwrap(); | ||
|
||
let mut cfg = new_test_config(1, 10, 1); | ||
// Set a max_size_per_msg that would suggest to Raft that the last committed entry should | ||
// not be included in the initial rd.committed_entries. However, our storage will ignore | ||
// this and *will* return it (which is how the Commit index ended up being 10 initially). | ||
cfg.max_size_per_msg = size - 1; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it should be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, the etcd codes of this test case are kind of buggy because the And since Though I found I should use |
||
|
||
s.inner | ||
.wl() | ||
.append(&[new_entry(1, 11, Some("boom"))]) | ||
.unwrap(); | ||
let mut raw_node = RawNode::with_default_logger(&cfg, s).unwrap(); | ||
let mut highest_applied = 0; | ||
while highest_applied != 11 { | ||
let rd = raw_node.ready(); | ||
let committed_entries = rd.committed_entries.clone().unwrap(); | ||
assert!( | ||
!committed_entries.is_empty(), | ||
"stop applying entries at index {}", | ||
highest_applied | ||
); | ||
let next = committed_entries.first().unwrap().get_index(); | ||
if highest_applied != 0 { | ||
assert_eq!( | ||
highest_applied + 1, | ||
next, | ||
"attempting to apply index {} after index {}, leaving a gap", | ||
next, | ||
highest_applied | ||
) | ||
} | ||
highest_applied = rd | ||
.committed_entries | ||
.as_ref() | ||
.unwrap() | ||
.last() | ||
.unwrap() | ||
.get_index(); | ||
raw_node.advance(rd); | ||
// node learns commit index is 11 | ||
raw_node.raft.r.raft_log.commit_to(11); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The test case doesn't seem to be the same as upstream.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is silently a little different from the one in etcd due to the diff of
MemStorage