Skip to content

Commit

Permalink
pageserver: fixes for layer path changes (#7786)
Browse files Browse the repository at this point in the history
## Problem

- When a layer with legacy local path format is evicted and then
re-downloaded, a panic happened because the path downloaded by remote
storage didn't match the path stored in Layer.
- While investigating, I also realized that secondary locations would
have a similar issue with evictions.

Closes: #7783 

## Summary of changes

- Make remote timeline client take local paths as an input: it should
not have its own ideas about local paths, instead it just uses the layer
path that the Layer has.
- Make secondary state store an explicit local path, populated on scan
of local disk at startup. This provides the same behavior as for Layer,
that our local_layer_path is a _default_, but the layer path can
actually be anything (e.g. an old style one).
- Add tests for both cases.
  • Loading branch information
jcsp authored May 17, 2024
1 parent c1390bf commit a8e6d25
Show file tree
Hide file tree
Showing 7 changed files with 121 additions and 90 deletions.
8 changes: 1 addition & 7 deletions pageserver/src/disk_usage_eviction_task.rs
Original file line number Diff line number Diff line change
Expand Up @@ -535,17 +535,11 @@ pub(crate) async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
}
EvictionLayer::Secondary(layer) => {
let file_size = layer.metadata.file_size();
let tenant_manager = tenant_manager.clone();

js.spawn(async move {
layer
.secondary_tenant
.evict_layer(
tenant_manager.get_conf(),
layer.timeline_id,
layer.name,
layer.metadata,
)
.evict_layer(layer.timeline_id, layer.name)
.await;
Ok(file_size)
});
Expand Down
2 changes: 2 additions & 0 deletions pageserver/src/tenant/remote_timeline_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,7 @@ impl RemoteTimelineClient {
&self,
layer_file_name: &LayerName,
layer_metadata: &LayerFileMetadata,
local_path: &Utf8Path,
cancel: &CancellationToken,
ctx: &RequestContext,
) -> anyhow::Result<u64> {
Expand All @@ -536,6 +537,7 @@ impl RemoteTimelineClient {
self.timeline_id,
layer_file_name,
layer_metadata,
local_path,
cancel,
ctx,
)
Expand Down
11 changes: 2 additions & 9 deletions pageserver/src/tenant/remote_timeline_client/download.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ use crate::config::PageServerConf;
use crate::context::RequestContext;
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
use crate::tenant::remote_timeline_client::{remote_layer_path, remote_timelines_path};
use crate::tenant::storage_layer::layer::local_layer_path;
use crate::tenant::storage_layer::LayerName;
use crate::tenant::Generation;
use crate::virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile};
Expand Down Expand Up @@ -50,19 +49,13 @@ pub async fn download_layer_file<'a>(
timeline_id: TimelineId,
layer_file_name: &'a LayerName,
layer_metadata: &'a LayerFileMetadata,
local_path: &Utf8Path,
cancel: &CancellationToken,
ctx: &RequestContext,
) -> Result<u64, DownloadError> {
debug_assert_current_span_has_tenant_and_timeline_id();

let timeline_path = conf.timeline_path(&tenant_shard_id, &timeline_id);
let local_path = local_layer_path(
conf,
&tenant_shard_id,
&timeline_id,
layer_file_name,
&layer_metadata.generation,
);

let remote_path = remote_layer_path(
&tenant_shard_id.tenant_id,
Expand All @@ -82,7 +75,7 @@ pub async fn download_layer_file<'a>(
// For more context about durable_rename check this email from postgres mailing list:
// https://www.postgresql.org/message-id/56583BDD.9060302@2ndquadrant.com
// If pageserver crashes the temp file will be deleted on startup and re-downloaded.
let temp_file_path = path_with_suffix_extension(&local_path, TEMP_DOWNLOAD_EXTENSION);
let temp_file_path = path_with_suffix_extension(local_path, TEMP_DOWNLOAD_EXTENSION);

let bytes_amount = download_retry(
|| async { download_object(storage, &remote_path, &temp_file_path, cancel, ctx).await },
Expand Down
54 changes: 11 additions & 43 deletions pageserver/src/tenant/secondary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,9 @@ mod scheduler;
use std::{sync::Arc, time::SystemTime};

use crate::{
config::PageServerConf,
context::RequestContext,
disk_usage_eviction_task::DiskUsageEvictionInfo,
task_mgr::{self, TaskKind, BACKGROUND_RUNTIME},
virtual_file::MaybeFatalIo,
};

use self::{
Expand All @@ -21,9 +19,8 @@ use self::{
use super::{
config::{SecondaryLocationConfig, TenantConfOpt},
mgr::TenantManager,
remote_timeline_client::LayerFileMetadata,
span::debug_assert_current_span_has_tenant_id,
storage_layer::{layer::local_layer_path, LayerName},
storage_layer::LayerName,
};

use pageserver_api::{
Expand Down Expand Up @@ -178,13 +175,7 @@ impl SecondaryTenant {

/// Cancellation safe, but on cancellation the eviction will go through
#[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), timeline_id=%timeline_id, name=%name))]
pub(crate) async fn evict_layer(
self: &Arc<Self>,
conf: &PageServerConf,
timeline_id: TimelineId,
name: LayerName,
metadata: LayerFileMetadata,
) {
pub(crate) async fn evict_layer(self: &Arc<Self>, timeline_id: TimelineId, name: LayerName) {
debug_assert_current_span_has_tenant_id();

let guard = match self.gate.enter() {
Expand All @@ -197,41 +188,11 @@ impl SecondaryTenant {

let now = SystemTime::now();

let local_path = local_layer_path(
conf,
&self.tenant_shard_id,
&timeline_id,
&name,
&metadata.generation,
);

let this = self.clone();

// spawn it to be cancellation safe
tokio::task::spawn_blocking(move || {
let _guard = guard;
// We tolerate ENOENT, because between planning eviction and executing
// it, the secondary downloader could have seen an updated heatmap that
// resulted in a layer being deleted.
// Other local I/O errors are process-fatal: these should never happen.
let deleted = std::fs::remove_file(local_path);

let not_found = deleted
.as_ref()
.is_err_and(|x| x.kind() == std::io::ErrorKind::NotFound);

let deleted = if not_found {
false
} else {
deleted
.map(|()| true)
.fatal_err("Deleting layer during eviction")
};

if !deleted {
// skip updating accounting and putting perhaps later timestamp
return;
}

// Update the timeline's state. This does not have to be synchronized with
// the download process, because:
Expand All @@ -250,8 +211,15 @@ impl SecondaryTenant {
// of the cache.
let mut detail = this.detail.lock().unwrap();
if let Some(timeline_detail) = detail.timelines.get_mut(&timeline_id) {
timeline_detail.on_disk_layers.remove(&name);
timeline_detail.evicted_at.insert(name, now);
let removed = timeline_detail.on_disk_layers.remove(&name);

// We might race with removal of the same layer during downloads, if it was removed
// from the heatmap. If we see that the OnDiskState is gone, then no need to
// do a physical deletion or store in evicted_at.
if let Some(removed) = removed {
removed.remove_blocking();
timeline_detail.evicted_at.insert(name, now);
}
}
})
.await
Expand Down
47 changes: 36 additions & 11 deletions pageserver/src/tenant/secondary/downloader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ struct SecondaryDownloader {
pub(super) struct OnDiskState {
metadata: LayerFileMetadata,
access_time: SystemTime,
local_path: Utf8PathBuf,
}

impl OnDiskState {
Expand All @@ -121,12 +122,26 @@ impl OnDiskState {
_ame: LayerName,
metadata: LayerFileMetadata,
access_time: SystemTime,
local_path: Utf8PathBuf,
) -> Self {
Self {
metadata,
access_time,
local_path,
}
}

// This is infallible, because all errors are either acceptable (ENOENT), or totally
// unexpected (fatal).
pub(super) fn remove_blocking(&self) {
// We tolerate ENOENT, because between planning eviction and executing
// it, the secondary downloader could have seen an updated heatmap that
// resulted in a layer being deleted.
// Other local I/O errors are process-fatal: these should never happen.
std::fs::remove_file(&self.local_path)
.or_else(fs_ext::ignore_not_found)
.fatal_err("Deleting secondary layer")
}
}

#[derive(Debug, Clone, Default)]
Expand Down Expand Up @@ -816,28 +831,20 @@ impl<'a> TenantDownloader<'a> {
if cfg!(debug_assertions) {
// Debug for https://github.com/neondatabase/neon/issues/6966: check that the files we think
// are already present on disk are really there.
let local_path = local_layer_path(
self.conf,
tenant_shard_id,
&timeline.timeline_id,
&layer.name,
&layer.metadata.generation,
);

match tokio::fs::metadata(&local_path).await {
match tokio::fs::metadata(&on_disk.local_path).await {
Ok(meta) => {
tracing::debug!(
"Layer {} present at {}, size {}",
layer.name,
local_path,
on_disk.local_path,
meta.len(),
);
}
Err(e) => {
tracing::warn!(
"Layer {} not found at {} ({})",
layer.name,
local_path,
on_disk.local_path,
e
);
debug_assert!(false);
Expand Down Expand Up @@ -926,13 +933,21 @@ impl<'a> TenantDownloader<'a> {
v.get_mut().access_time = t.access_time;
}
Entry::Vacant(e) => {
let local_path = local_layer_path(
self.conf,
tenant_shard_id,
&timeline.timeline_id,
&t.name,
&t.metadata.generation,
);
e.insert(OnDiskState::new(
self.conf,
tenant_shard_id,
&timeline.timeline_id,
t.name,
LayerFileMetadata::from(&t.metadata),
t.access_time,
local_path,
));
}
}
Expand All @@ -955,6 +970,14 @@ impl<'a> TenantDownloader<'a> {
&self.secondary_state.cancel
);

let local_path = local_layer_path(
self.conf,
tenant_shard_id,
timeline_id,
&layer.name,
&layer.metadata.generation,
);

// Note: no backoff::retry wrapper here because download_layer_file does its own retries internally
let downloaded_bytes = match download_layer_file(
self.conf,
Expand All @@ -963,6 +986,7 @@ impl<'a> TenantDownloader<'a> {
*timeline_id,
&layer.name,
&LayerFileMetadata::from(&layer.metadata),
&local_path,
&self.secondary_state.cancel,
ctx,
)
Expand Down Expand Up @@ -1116,6 +1140,7 @@ async fn init_timeline_state(
name,
LayerFileMetadata::from(&remote_meta.metadata),
remote_meta.access_time,
file_path,
),
);
}
Expand Down
1 change: 1 addition & 0 deletions pageserver/src/tenant/storage_layer/layer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1108,6 +1108,7 @@ impl LayerInner {
.download_layer_file(
&self.desc.layer_name(),
&self.metadata(),
&self.path,
&timeline.cancel,
ctx,
)
Expand Down
Loading

1 comment on commit a8e6d25

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

3153 tests run: 3013 passed, 0 failed, 140 skipped (full report)


Code coverage* (full report)

  • functions: 31.5% (6350 of 20188 functions)
  • lines: 47.4% (47942 of 101051 lines)

* collected from Rust tests only


The comment gets automatically updated with the latest test results
a8e6d25 at 2024-05-17T13:48:54.855Z :recycle:

Please sign in to comment.