Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support tenant manifests in the scrubber #9942

Merged
merged 16 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pageserver/src/tenant/remote_timeline_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2564,9 +2564,9 @@ pub fn parse_remote_index_path(path: RemotePath) -> Option<Generation> {
}

/// Given the key of a tenant manifest, parse out the generation number
pub(crate) fn parse_remote_tenant_manifest_path(path: RemotePath) -> Option<Generation> {
pub fn parse_remote_tenant_manifest_path(path: RemotePath) -> Option<Generation> {
static RE: OnceLock<Regex> = OnceLock::new();
let re = RE.get_or_init(|| Regex::new(r".+tenant-manifest-([0-9a-f]{8}).json").unwrap());
let re = RE.get_or_init(|| Regex::new(r".*tenant-manifest-([0-9a-f]{8}).json").unwrap());
problame marked this conversation as resolved.
Show resolved Hide resolved
re.captures(path.get_path().as_str())
.and_then(|c| c.get(1))
.and_then(|m| Generation::parse_suffix(m.as_str()))
Expand Down
2 changes: 1 addition & 1 deletion pageserver/src/tenant/remote_timeline_client/manifest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ impl TenantManifest {
offloaded_timelines: vec![],
}
}
pub(crate) fn from_json_bytes(bytes: &[u8]) -> Result<Self, serde_json::Error> {
pub fn from_json_bytes(bytes: &[u8]) -> Result<Self, serde_json::Error> {
serde_json::from_slice::<Self>(bytes)
}

Expand Down
139 changes: 138 additions & 1 deletion storage_scrubber/src/checks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,21 @@ use itertools::Itertools;
use pageserver::tenant::checks::check_valid_layermap;
use pageserver::tenant::layer_map::LayerMap;
use pageserver::tenant::remote_timeline_client::index::LayerFileMetadata;
use pageserver::tenant::remote_timeline_client::manifest::TenantManifest;
use pageserver_api::shard::ShardIndex;
use tokio_util::sync::CancellationToken;
use tracing::{info, warn};
use utils::generation::Generation;
use utils::id::TimelineId;
use utils::shard::TenantShardId;

use crate::cloud_admin_api::BranchData;
use crate::metadata_stream::stream_listing;
use crate::{download_object_with_retries, RootTarget, TenantShardTimelineId};
use futures_util::StreamExt;
use pageserver::tenant::remote_timeline_client::{parse_remote_index_path, remote_layer_path};
use pageserver::tenant::remote_timeline_client::{
parse_remote_index_path, parse_remote_tenant_manifest_path, remote_layer_path,
};
use pageserver::tenant::storage_layer::LayerName;
use pageserver::tenant::IndexPart;
use remote_storage::{GenericRemoteStorage, ListingObject, RemotePath};
Expand Down Expand Up @@ -527,3 +531,136 @@ async fn list_timeline_blobs_impl(
unknown_keys,
}))
}

pub(crate) struct RemoteTenantManifestInfo {
pub(crate) latest_generation: Option<Generation>,
pub(crate) manifests: Vec<(Generation, ListingObject)>,
#[allow(dead_code)]
pub(crate) unknown_keys: Vec<ListingObject>,
}

pub(crate) struct ListTenantManifestResult {
pub(crate) errors: Vec<String>,
pub(crate) manifest_info: RemoteTenantManifestInfo,
}

/// Returns [`ListTimelineBlobsResult::MissingIndexPart`] if blob data has layer files
/// but is missing [`IndexPart`], otherwise returns [`ListTimelineBlobsResult::Ready`].
pub(crate) async fn list_tenant_manifests(
problame marked this conversation as resolved.
Show resolved Hide resolved
remote_client: &GenericRemoteStorage,
tenant_id: TenantShardId,
root_target: &RootTarget,
) -> anyhow::Result<ListTenantManifestResult> {
let mut errors = Vec::new();
let mut unknown_keys = Vec::new();

let mut tenant_root_target = root_target.tenant_root(&tenant_id);
const TENANT_MANIFEST_STEM: &str = "tenant-manifest";
let original_prefix = tenant_root_target.prefix_in_bucket.clone();
tenant_root_target.prefix_in_bucket += TENANT_MANIFEST_STEM;
tenant_root_target.delimiter = String::new();

let mut manifests: Vec<(Generation, ListingObject)> = Vec::new();

let prefix_str = &original_prefix
.strip_prefix("/")
.unwrap_or(&original_prefix);

let mut stream = std::pin::pin!(stream_listing(remote_client, &tenant_root_target));
'outer: while let Some(obj) = stream.next().await {
let (key, Some(obj)) = obj? else {
panic!("ListingObject not specified");
};

let blob_name = key.get_path().as_str().strip_prefix(prefix_str);
'err: {
// TODO a let chain would be nicer here.
let Some(name) = blob_name else {
break 'err;
};
if !name.starts_with("tenant-manifest") {
problame marked this conversation as resolved.
Show resolved Hide resolved
break 'err;
}
let Ok(path) = RemotePath::from_string(name) else {
break 'err;
};
let Some(generation) = parse_remote_tenant_manifest_path(path) else {
break 'err;
};
problame marked this conversation as resolved.
Show resolved Hide resolved
tracing::debug!("tenant manifest {key}");
manifests.push((generation, obj));
continue 'outer;
}
tracing::info!("Listed an unknown key: {key}");
unknown_keys.push(obj);
}

if manifests.is_empty() {
tracing::debug!("No manifest for timeline.");

return Ok(ListTenantManifestResult {
errors,
manifest_info: RemoteTenantManifestInfo {
latest_generation: None,
manifests,
unknown_keys,
},
});
}

// Find the manifest with the highest generation
let (latest_generation, latest_listing_object) = manifests
.iter()
.max_by_key(|i| i.0)
.map(|(g, obj)| (*g, obj.clone()))
.unwrap();
problame marked this conversation as resolved.
Show resolved Hide resolved

let manifest_bytes =
match download_object_with_retries(remote_client, &latest_listing_object.key).await {
Ok(bytes) => bytes,
Err(e) => {
// It is possible that the tenant gets deleted in-between we list the objects
// and we download the manifest file.
errors.push(format!("failed to download tenant-manifest.json: {e}"));
return Ok(ListTenantManifestResult {
errors,
manifest_info: RemoteTenantManifestInfo {
latest_generation: Some(latest_generation),
manifests,
unknown_keys,
},
});
}
};

match TenantManifest::from_json_bytes(&manifest_bytes) {
Ok(_manifest) => {
return Ok(ListTenantManifestResult {
errors,
manifest_info: RemoteTenantManifestInfo {
latest_generation: Some(latest_generation),
manifests,
unknown_keys,
},
});
}
Err(parse_error) => errors.push(format!(
"tenant-manifest.json body parsing error: {parse_error}"
)),
problame marked this conversation as resolved.
Show resolved Hide resolved
}

if errors.is_empty() {
errors.push(
"Unexpected: no errors did not lead to a successfully parsed blob return".to_string(),
);
}

Ok(ListTenantManifestResult {
errors,
manifest_info: RemoteTenantManifestInfo {
latest_generation: Some(latest_generation),
manifests,
unknown_keys,
},
})
}
Loading
Loading