-
Notifications
You must be signed in to change notification settings - Fork 709
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Enable parallel key scraping #1985
Merged
Merged
Changes from 5 commits
Commits
Show all changes
21 commits
Select commit
Hold shift + click to select a range
b757b99
enable parallel scraping
eagr d39f72e
glitch
eagr be9a976
Merge branch 'master' into para-scraping
liamaharon 16e0e0d
split fn
eagr 48701a3
get parallel
eagr ea33098
rewrite rpc_get_keys_parallel()
eagr 4be72d8
parallel with start key instead of prefix
eagr 775b037
avoid unnecessary clones
eagr 9881284
make start key optional
eagr 05ebb51
thanks clippy
eagr 8250746
divide workload based on suffix size
eagr 981a54c
Merge branch 'master' into para-scraping
eagr 5043d04
bench different number of chunks
eagr 64db2e5
add rpc_get_keys_paged bench
liamaharon 605cd8d
clean up
eagr 5d2392d
as per advices
eagr 01f0f77
Merge branch 'master' into para-scraping
liamaharon 6b1e5f3
Merge branch 'master' into para-scraping
ggwpez 3530766
link followup
eagr 45e4dfb
Merge branch 'master' into para-scraping
liamaharon 8ca5469
Merge branch 'master' into para-scraping
liamaharon File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -439,6 +439,37 @@ where | |
Ok(keys) | ||
} | ||
|
||
/// Get keys at `prefix` in `block` in parallel manner. | ||
async fn rpc_get_keys_parallel( | ||
&self, | ||
prefix: StorageKey, | ||
block: B::Hash, | ||
parallel: u16, | ||
) -> Result<Vec<StorageKey>, &'static str> { | ||
let prefixes = extend_prefix(&prefix, parallel); | ||
let batch = prefixes.into_iter().map(|prefix| self.rpc_get_keys_paged(prefix, block)); | ||
|
||
let keys = futures::future::join_all(batch) | ||
.await | ||
.into_iter() | ||
.filter_map(|res| match res { | ||
Ok(keys) => Some(keys), | ||
Err(err) => { | ||
log::warn!( | ||
target: LOG_TARGET, | ||
"{} when fetching keys at block {:?}", | ||
err, | ||
block, | ||
); | ||
None | ||
}, | ||
}) | ||
.flatten() | ||
.collect::<Vec<StorageKey>>(); | ||
|
||
Ok(keys) | ||
} | ||
|
||
/// Fetches storage data from a node using a dynamic batch size. | ||
/// | ||
/// This function adjusts the batch size on the fly to help prevent overwhelming the node with | ||
|
@@ -590,16 +621,17 @@ where | |
/// map them to values one by one. | ||
/// | ||
/// This can work with public nodes. But, expect it to be darn slow. | ||
pub(crate) async fn rpc_get_pairs_paged( | ||
pub(crate) async fn rpc_get_pairs( | ||
&self, | ||
prefix: StorageKey, | ||
at: B::Hash, | ||
pending_ext: &mut TestExternalities<HashingFor<B>>, | ||
parallel: u16, | ||
) -> Result<Vec<KeyValue>, &'static str> { | ||
let start = Instant::now(); | ||
let mut sp = Spinner::with_timer(Spinners::Dots, "Scraping keys...".into()); | ||
let keys = self | ||
.rpc_get_keys_paged(prefix.clone(), at) | ||
.rpc_get_keys_parallel(prefix.clone(), at, parallel) | ||
.await? | ||
.into_iter() | ||
.collect::<Vec<_>>(); | ||
|
@@ -759,6 +791,64 @@ where | |
} | ||
} | ||
|
||
// Create a batch of storage key prefixes each starting with `prefix`, meant to be used for key | ||
// scraping. Given the prefix 00, the return can be 000-00F or 0000-00FF, depending on `size`. | ||
// `size` will be rounded to power of 16 if not already, so is the returned batch size. | ||
fn extend_prefix(prefix: &StorageKey, size: u16) -> Vec<StorageKey> { | ||
const MAX_EXT_LEN: usize = 3; | ||
const MAX_BATCH_SIZE: u16 = 16u16.pow(MAX_EXT_LEN as u32); | ||
const POW_OF_SIXTEEN: [u16; MAX_EXT_LEN] = [1, 16, 256]; | ||
|
||
// round to power of 16 | ||
// up to MAX_BATCH_SIZE | ||
fn round(n: u16) -> (u16, usize) { | ||
if n <= 1 { | ||
return (1, 0) | ||
} else if n <= 16 { | ||
return (16, 1) | ||
} | ||
|
||
let mut pow: u16 = 16; | ||
let mut exp: usize = 1; | ||
|
||
while pow < n { | ||
if pow == MAX_BATCH_SIZE { | ||
break | ||
} | ||
|
||
pow = pow.saturating_mul(16); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What stops |
||
exp += 1; | ||
} | ||
|
||
debug_assert!(pow <= MAX_BATCH_SIZE); | ||
debug_assert!(exp <= MAX_EXT_LEN); | ||
|
||
// round down if below threshold | ||
if n * 4 <= pow { | ||
(pow / 16, exp - 1) | ||
} else { | ||
(pow, exp) | ||
} | ||
} | ||
|
||
let (size, len) = round(size); | ||
let mut ext = vec![0; len]; | ||
|
||
(0..size) | ||
.map(|idx| { | ||
// 0-f | 00-ff | 000-fff | ||
// relatively static, use OnceCell if turned out to be hot | ||
for i in 0..len { | ||
ext[len - i - 1] = (idx / POW_OF_SIXTEEN[i] % 16) as u8; | ||
} | ||
|
||
let mut prefix = prefix.as_ref().to_vec(); | ||
prefix.extend(&ext); | ||
StorageKey(prefix) | ||
}) | ||
.collect() | ||
} | ||
|
||
impl<B: BlockT + DeserializeOwned> Builder<B> | ||
where | ||
B::Hash: DeserializeOwned, | ||
|
@@ -846,7 +936,7 @@ where | |
for prefix in &config.hashed_prefixes { | ||
let now = std::time::Instant::now(); | ||
let additional_key_values = | ||
self.rpc_get_pairs_paged(StorageKey(prefix.to_vec()), at, pending_ext).await?; | ||
self.rpc_get_pairs(StorageKey(prefix.to_vec()), at, pending_ext, 16).await?; | ||
let elapsed = now.elapsed(); | ||
log::info!( | ||
target: LOG_TARGET, | ||
|
@@ -1440,4 +1530,34 @@ mod remote_tests { | |
.unwrap() | ||
.execute_with(|| {}); | ||
} | ||
|
||
#[test] | ||
fn prefixes_for_scraping_keys() { | ||
let prefix = StorageKey(vec![0, 0]); | ||
|
||
assert_eq!(extend_prefix(&prefix, 0), vec![StorageKey(vec![0, 0])]); | ||
assert_eq!(extend_prefix(&prefix, 1), vec![StorageKey(vec![0, 0])]); | ||
assert_eq!(extend_prefix(&prefix, 16), (0..16).map(|i| StorageKey(vec![0, 0, i])).collect::<Vec<_>>()); | ||
|
||
let prefixes = extend_prefix(&prefix, 256); | ||
assert_eq!(prefixes, (0..256u32).map(|i| StorageKey(vec![0, 0, (i / 16 % 16) as u8, (i % 16) as u8])).collect::<Vec<_>>()); | ||
assert_eq!(prefixes[0], StorageKey(vec![0, 0, 0, 0])); | ||
assert_eq!(prefixes[1], StorageKey(vec![0, 0, 0, 1])); | ||
assert_eq!(prefixes[15], StorageKey(vec![0, 0, 0, 15])); | ||
assert_eq!(prefixes[16], StorageKey(vec![0, 0, 1, 0])); | ||
assert_eq!(prefixes[254], StorageKey(vec![0, 0, 15, 14])); | ||
assert_eq!(prefixes[255], StorageKey(vec![0, 0, 15, 15])); | ||
|
||
let prefixes = extend_prefix(&prefix, 4096); | ||
assert_eq!(prefixes, (0..4096u32).map(|i| StorageKey(vec![0, 0, (i / 256 % 16) as u8, (i / 16 % 16) as u8, (i % 16) as u8])).collect::<Vec<_>>()); | ||
assert_eq!(prefixes[0], StorageKey(vec![0, 0, 0, 0, 0])); | ||
assert_eq!(prefixes[1], StorageKey(vec![0, 0, 0, 0, 1])); | ||
assert_eq!(prefixes[4094], StorageKey(vec![0, 0, 15, 15, 14])); | ||
assert_eq!(prefixes[4095], StorageKey(vec![0, 0, 15, 15, 15])); | ||
|
||
// rounding | ||
assert_eq!(extend_prefix(&prefix, 2), extend_prefix(&prefix, 16)); | ||
assert_eq!(extend_prefix(&prefix, 65), extend_prefix(&prefix, 256)); | ||
assert_eq!(extend_prefix(&prefix, 1025), extend_prefix(&prefix, 4096)); | ||
} | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.