Skip to content

Commit

Permalink
feat(page_service): timeout-based batching of requests (#9321)
Browse files Browse the repository at this point in the history
## Problem

We don't take advantage of queue depth generated by the compute
on the pageserver. We can process getpage requests more efficiently
by batching them. 

## Summary of changes

Batch up incoming getpage requests that arrive within a configurable
time window (`server_side_batch_timeout`).
Then process the entire batch via one `get_vectored` timeline operation.
By default, no merging takes place.

## Testing

* **Functional**: #9792
* **Performance**: will be done in staging/pre-prod

# Refs

* #9377
* #9376

Co-authored-by: Christian Schwarz <christian@neon.tech>
  • Loading branch information
VladLazar and problame authored Nov 18, 2024
1 parent e5c89f3 commit d7662fd
Show file tree
Hide file tree
Showing 9 changed files with 705 additions and 195 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions libs/pageserver_api/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ pub struct ConfigToml {
pub virtual_file_io_mode: Option<crate::models::virtual_file::IoMode>,
#[serde(skip_serializing_if = "Option::is_none")]
pub no_sync: Option<bool>,
#[serde(with = "humantime_serde")]
pub server_side_batch_timeout: Option<Duration>,
}

#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
Expand Down Expand Up @@ -317,6 +319,8 @@ pub mod defaults {
pub const DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB: usize = 0;

pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 512;

pub const DEFAULT_SERVER_SIDE_BATCH_TIMEOUT: Option<&str> = None;
}

impl Default for ConfigToml {
Expand Down Expand Up @@ -397,6 +401,8 @@ impl Default for ConfigToml {
ephemeral_bytes_per_memory_kb: (DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
l0_flush: None,
virtual_file_io_mode: None,
server_side_batch_timeout: DEFAULT_SERVER_SIDE_BATCH_TIMEOUT
.map(|duration| humantime::parse_duration(duration).unwrap()),
tenant_config: TenantConfigToml::default(),
no_sync: None,
}
Expand Down
3 changes: 3 additions & 0 deletions libs/postgres_backend/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,9 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
Ok(())
}

// Proto looks like this:
// FeMessage::Query("pagestream_v2{FeMessage::CopyData(PagesetreamFeMessage::GetPage(..))}")

async fn process_message(
&mut self,
handler: &mut impl Handler<IO>,
Expand Down
1 change: 1 addition & 0 deletions pageserver/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ enumset = { workspace = true, features = ["serde"]}
strum.workspace = true
strum_macros.workspace = true
wal_decoder.workspace = true
smallvec.workspace = true

[target.'cfg(target_os = "linux")'.dependencies]
procfs.workspace = true
Expand Down
6 changes: 6 additions & 0 deletions pageserver/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,10 @@ pub struct PageServerConf {

/// Optionally disable disk syncs (unsafe!)
pub no_sync: bool,

/// Maximum amount of time for which a get page request request
/// might be held up for request merging.
pub server_side_batch_timeout: Option<Duration>,
}

/// Token for authentication to safekeepers
Expand Down Expand Up @@ -336,6 +340,7 @@ impl PageServerConf {
concurrent_tenant_warmup,
concurrent_tenant_size_logical_size_queries,
virtual_file_io_engine,
server_side_batch_timeout,
tenant_config,
no_sync,
} = config_toml;
Expand Down Expand Up @@ -377,6 +382,7 @@ impl PageServerConf {
image_compression,
timeline_offloading,
ephemeral_bytes_per_memory_kb,
server_side_batch_timeout,

// ------------------------------------------------------------
// fields that require additional validation or custom handling
Expand Down
21 changes: 17 additions & 4 deletions pageserver/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1187,6 +1187,7 @@ struct GlobalAndPerTimelineHistogramTimer<'a, 'c> {
ctx: &'c RequestContext,
start: std::time::Instant,
op: SmgrQueryType,
count: usize,
}

impl Drop for GlobalAndPerTimelineHistogramTimer<'_, '_> {
Expand Down Expand Up @@ -1214,10 +1215,13 @@ impl Drop for GlobalAndPerTimelineHistogramTimer<'_, '_> {
elapsed
}
};
self.global_latency_histo
.observe(ex_throttled.as_secs_f64());
if let Some(per_timeline_getpage_histo) = self.per_timeline_latency_histo {
per_timeline_getpage_histo.observe(ex_throttled.as_secs_f64());

for _ in 0..self.count {
self.global_latency_histo
.observe(ex_throttled.as_secs_f64());
if let Some(per_timeline_getpage_histo) = self.per_timeline_latency_histo {
per_timeline_getpage_histo.observe(ex_throttled.as_secs_f64());
}
}
}
}
Expand Down Expand Up @@ -1385,6 +1389,14 @@ impl SmgrQueryTimePerTimeline {
&'a self,
op: SmgrQueryType,
ctx: &'c RequestContext,
) -> Option<impl Drop + 'a> {
self.start_timer_many(op, 1, ctx)
}
pub(crate) fn start_timer_many<'c: 'a, 'a>(
&'a self,
op: SmgrQueryType,
count: usize,
ctx: &'c RequestContext,
) -> Option<impl Drop + 'a> {
let start = Instant::now();

Expand Down Expand Up @@ -1422,6 +1434,7 @@ impl SmgrQueryTimePerTimeline {
ctx,
start,
op,
count,
})
}
}
Expand Down
Loading

1 comment on commit d7662fd

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

5590 tests run: 5352 passed, 2 failed, 236 skipped (full report)


Failures on Postgres 16

# Run all failed tests locally:
scripts/pytest -vv -n $(nproc) -k "test_sharded_ingest[release-pg16-github-actions-selfhosted-1] or test_compaction_l0_memory[release-pg16-github-actions-selfhosted]"

Code coverage* (full report)

  • functions: 31.5% (7936 of 25181 functions)
  • lines: 49.6% (62944 of 126883 lines)

* collected from Rust tests only


The comment gets automatically updated with the latest test results
d7662fd at 2024-11-18T22:03:21.614Z :recycle:

Please sign in to comment.