Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Delete tenant's data from s3 #4855

Merged
merged 29 commits into from
Aug 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
8cac369
Implement tenant deletion
LizardWizzard Jul 27, 2023
bb21390
Merge branch 'main' into dkr/tenant-delete
LizardWizzard Aug 1, 2023
2ed3602
Update pageserver/src/http/routes.rs
LizardWizzard Aug 1, 2023
d0f8549
Update pageserver/src/tenant/delete.rs
LizardWizzard Aug 1, 2023
5a42259
more review feedback
LizardWizzard Aug 1, 2023
f6530fc
fix remote mark path to be relative
LizardWizzard Aug 1, 2023
44672d2
Merge branch 'main' into dkr/tenant-delete
LizardWizzard Aug 1, 2023
a129bce
fix comments
LizardWizzard Aug 1, 2023
d7d2ab0
dont eat original error
LizardWizzard Aug 1, 2023
8d6ad0d
add timeline delete endpoint to openapi spec
LizardWizzard Aug 1, 2023
97289cd
handle errors injected by UnreliableWrapper when fetching deletion mark
LizardWizzard Aug 1, 2023
32cd3f5
add smoke test for tenant deletion
LizardWizzard Aug 2, 2023
224bdda
tests: add test with extensive failpoint usage, fix detected bugs
LizardWizzard Aug 7, 2023
6211d94
Merge branch 'main' into dkr/tenant-delete
LizardWizzard Aug 7, 2023
8daa6c3
tests: use cli to create tenant because config format is a bit differ…
LizardWizzard Aug 7, 2023
57ccd2c
tests: fix metrics assertions
LizardWizzard Aug 7, 2023
c3bad7d
remove debug code
LizardWizzard Aug 7, 2023
cf5bb7f
fix real s3 test to wait for tenant to become active with certain fai…
LizardWizzard Aug 7, 2023
6259353
ignore left-over tasks on shutdown
LizardWizzard Aug 7, 2023
8c48c9b
dedup tenant_id in spans
LizardWizzard Aug 9, 2023
af2e26f
rename assert_timelines_dir_empty -> ensure_timelines_dir_empty
LizardWizzard Aug 9, 2023
2380653
Update pageserver/src/tenant/delete.rs
LizardWizzard Aug 9, 2023
8e26a6d
make standalone timelines to be branches of main branch
LizardWizzard Aug 9, 2023
e2a0938
remove outdated comment
LizardWizzard Aug 9, 2023
01cef50
Update pageserver/src/tenant.rs
LizardWizzard Aug 10, 2023
84ba627
tweak poll iterations to reduce flakiness
LizardWizzard Aug 10, 2023
d7f2abf
dedup deletion polling routines to use wait_until
LizardWizzard Aug 10, 2023
1235d82
fix polling
LizardWizzard Aug 10, 2023
6bca75a
remove outdated comment
LizardWizzard Aug 10, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 36 additions & 1 deletion libs/utils/src/fs_ext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,20 @@ pub async fn is_directory_empty(path: impl AsRef<Path>) -> anyhow::Result<bool>
Ok(dir.next_entry().await?.is_none())
}

pub async fn list_dir(path: impl AsRef<Path>) -> anyhow::Result<Vec<String>> {
let mut dir = tokio::fs::read_dir(&path)
.await
.context(format!("read_dir({})", path.as_ref().display()))?;

let mut content = vec![];
while let Some(next) = dir.next_entry().await? {
let file_name = next.file_name();
content.push(file_name.to_string_lossy().to_string());
}

Ok(content)
}

pub fn ignore_not_found(e: io::Error) -> io::Result<()> {
if e.kind() == io::ErrorKind::NotFound {
Ok(())
Expand All @@ -43,7 +57,7 @@ where
mod test {
use std::path::PathBuf;

use crate::fs_ext::is_directory_empty;
use crate::fs_ext::{is_directory_empty, list_dir};

use super::ignore_absent_files;

Expand Down Expand Up @@ -109,4 +123,25 @@ mod test {

assert!(!file_path.exists());
}

#[tokio::test]
async fn list_dir_works() {
let dir = tempfile::tempdir().unwrap();
let dir_path = dir.path();

assert!(list_dir(dir_path).await.unwrap().is_empty());

let file_path: PathBuf = dir_path.join("testfile");
let _ = std::fs::File::create(&file_path).unwrap();

assert_eq!(&list_dir(dir_path).await.unwrap(), &["testfile"]);

let another_dir_path: PathBuf = dir_path.join("testdir");
std::fs::create_dir(another_dir_path).unwrap();

let expected = &["testdir", "testfile"];
let mut actual = list_dir(dir_path).await.unwrap();
actual.sort();
assert_eq!(actual, expected);
}
}
2 changes: 1 addition & 1 deletion pageserver/src/bin/pageserver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ fn start_pageserver(
let order = pageserver::InitializationOrder {
initial_tenant_load: Some(init_done_tx),
initial_logical_size_can_start: init_done_rx.clone(),
initial_logical_size_attempt: init_logical_size_done_tx,
initial_logical_size_attempt: Some(init_logical_size_done_tx),
background_jobs_can_start: background_jobs_barrier.clone(),
};

Expand Down
9 changes: 8 additions & 1 deletion pageserver/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ use utils::{
use crate::disk_usage_eviction_task::DiskUsageEvictionTaskConfig;
use crate::tenant::config::TenantConf;
use crate::tenant::config::TenantConfOpt;
use crate::tenant::{TENANT_ATTACHING_MARKER_FILENAME, TIMELINES_SEGMENT_NAME};
use crate::tenant::{
TENANT_ATTACHING_MARKER_FILENAME, TENANT_DELETED_MARKER_FILE_NAME, TIMELINES_SEGMENT_NAME,
};
use crate::{
IGNORED_TENANT_FILE_NAME, METADATA_FILE_NAME, TENANT_CONFIG_NAME, TIMELINE_DELETE_MARK_SUFFIX,
TIMELINE_UNINIT_MARK_SUFFIX,
Expand Down Expand Up @@ -613,6 +615,11 @@ impl PageServerConf {
)
}

pub fn tenant_deleted_mark_file_path(&self, tenant_id: &TenantId) -> PathBuf {
self.tenant_path(tenant_id)
.join(TENANT_DELETED_MARKER_FILE_NAME)
}

pub fn traces_path(&self) -> PathBuf {
self.workdir.join("traces")
}
Expand Down
42 changes: 42 additions & 0 deletions pageserver/src/http/openapi_spec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,47 @@ paths:
application/json:
schema:
$ref: "#/components/schemas/Error"
delete:
description: |
Attempts to delete specified tenant. 500 and 409 errors should be retried until 404 is retrieved.
404 means that deletion successfully finished"
jcsp marked this conversation as resolved.
Show resolved Hide resolved
responses:
"400":
description: Error when no tenant id found in path
content:
application/json:
schema:
$ref: "#/components/schemas/Error"
"401":
description: Unauthorized Error
content:
application/json:
schema:
$ref: "#/components/schemas/UnauthorizedError"
"403":
description: Forbidden Error
content:
application/json:
schema:
$ref: "#/components/schemas/ForbiddenError"
"404":
description: Tenant not found
content:
application/json:
schema:
$ref: "#/components/schemas/NotFoundError"
"409":
description: Deletion is already in progress, continue polling
content:
application/json:
schema:
$ref: "#/components/schemas/ConflictError"
"500":
description: Generic operation error
content:
application/json:
schema:
$ref: "#/components/schemas/Error"

/v1/tenant/{tenant_id}/timeline:
parameters:
Expand Down Expand Up @@ -820,6 +861,7 @@ paths:
application/json:
schema:
$ref: "#/components/schemas/Error"

/v1/tenant/config:
put:
description: |
Expand Down
35 changes: 34 additions & 1 deletion pageserver/src/http/routes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ impl From<crate::tenant::DeleteTimelineError> for ApiError {
format!("Cannot delete timeline which has child timelines: {children:?}")
.into_boxed_str(),
),
a @ AlreadyInProgress => ApiError::Conflict(a.to_string()),
a @ AlreadyInProgress(_) => ApiError::Conflict(a.to_string()),
Other(e) => ApiError::InternalServerError(e),
}
}
Expand All @@ -208,6 +208,19 @@ impl From<crate::tenant::mgr::DeleteTimelineError> for ApiError {
}
}

impl From<crate::tenant::delete::DeleteTenantError> for ApiError {
fn from(value: crate::tenant::delete::DeleteTenantError) -> Self {
use crate::tenant::delete::DeleteTenantError::*;
match value {
Get(g) => ApiError::from(g),
e @ AlreadyInProgress => ApiError::Conflict(e.to_string()),
Timeline(t) => ApiError::from(t),
Other(o) => ApiError::InternalServerError(o),
e @ InvalidState(_) => ApiError::PreconditionFailed(e.to_string().into_boxed_str()),
}
}
}

// Helper function to construct a TimelineInfo struct for a timeline
async fn build_timeline_info(
timeline: &Arc<Timeline>,
Expand Down Expand Up @@ -617,6 +630,23 @@ async fn tenant_status(
json_response(StatusCode::OK, tenant_info)
}

async fn tenant_delete_handler(
request: Request<Body>,
_cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
// TODO openapi spec
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
check_permission(&request, Some(tenant_id))?;

let state = get_state(&request);

mgr::delete_tenant(state.conf, state.remote_storage.clone(), tenant_id)
.instrument(info_span!("tenant_delete_handler", %tenant_id))
.await?;

json_response(StatusCode::ACCEPTED, ())
}

/// HTTP endpoint to query the current tenant_size of a tenant.
///
/// This is not used by consumption metrics under [`crate::consumption_metrics`], but can be used
Expand Down Expand Up @@ -1345,6 +1375,9 @@ pub fn make_router(
.get("/v1/tenant", |r| api_handler(r, tenant_list_handler))
.post("/v1/tenant", |r| api_handler(r, tenant_create_handler))
.get("/v1/tenant/:tenant_id", |r| api_handler(r, tenant_status))
.delete("/v1/tenant/:tenant_id", |r| {
api_handler(r, tenant_delete_handler)
})
.get("/v1/tenant/:tenant_id/synthetic_size", |r| {
api_handler(r, tenant_size_handler)
})
Expand Down
2 changes: 1 addition & 1 deletion pageserver/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ pub struct InitializationOrder {

/// Each timeline owns a clone of this to be consumed on the initial logical size calculation
/// attempt. It is important to drop this once the attempt has completed.
pub initial_logical_size_attempt: utils::completion::Completion,
pub initial_logical_size_attempt: Option<utils::completion::Completion>,

/// Barrier for when we can start any background jobs.
///
Expand Down
Loading