From 1568b1cf795686e4423ce3109d4aeeb406e9257e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oddbj=C3=B8rn=20Gr=C3=B8dem?= <29732646+oddgrd@users.noreply.github.com> Date: Fri, 29 Dec 2023 13:32:17 +0100 Subject: [PATCH] fix: tracing fixes, use consistent key names for project and service names (#1500) * fix(auth): use display impl for account name field * fix(deployer): add missing fields to proxy span, set status code on error * fix(gateway): always use display impl for project name fields * fix(auth,provisioner): 1.75 clippy * refactor(auth): use same naming convention for account name & tier fields * fix(deployer): use shuttle.service name, record error status * revert(auth): remove convery key instrument * fix(auth): skip duplicate fields * refactor(deployer,gateway): use shuttle.project.name for project names, shuttle.service.name for service names * fix(gateway): missed rename in task and proxy --- auth/src/api/handlers.rs | 8 ++++---- auth/src/error.rs | 2 +- deployer/src/deployment/run.rs | 4 ++-- deployer/src/handlers/mod.rs | 24 ++++++++++++------------ deployer/src/persistence/mod.rs | 2 +- deployer/src/proxy.rs | 20 ++++++++++++++++++-- gateway/src/api/latest.rs | 10 +++++----- gateway/src/project.rs | 4 ++-- gateway/src/proxy.rs | 4 ++-- gateway/src/service.rs | 2 +- gateway/src/task.rs | 4 ++-- provisioner/src/lib.rs | 2 +- 12 files changed, 51 insertions(+), 35 deletions(-) diff --git a/auth/src/api/handlers.rs b/auth/src/api/handlers.rs index 8fc5e75b9..3b55e91c3 100644 --- a/auth/src/api/handlers.rs +++ b/auth/src/api/handlers.rs @@ -18,7 +18,7 @@ use tracing::{error, instrument}; use super::{builder::KeyManagerState, RouterState, UserManagerState}; -#[instrument(skip(user_manager))] +#[instrument(skip_all, fields(account.name = %account_name))] pub(crate) async fn get_user( _: Admin, State(user_manager): State, @@ -29,7 +29,7 @@ pub(crate) async fn get_user( Ok(Json(user.into())) } -#[instrument(skip(user_manager))] +#[instrument(skip_all, fields(account.name = %account_name, account.tier = %account_tier))] pub(crate) async fn post_user( _: Admin, State(user_manager): State, @@ -40,7 +40,7 @@ pub(crate) async fn post_user( Ok(Json(user.into())) } -#[instrument(skip(user_manager))] +#[instrument(skip(user_manager, account_name, account_tier), fields(account.name = %account_name, account.tier = %account_tier))] pub(crate) async fn update_user_tier( _: Admin, State(user_manager): State, @@ -65,7 +65,7 @@ pub(crate) async fn update_user_tier( Ok(()) } -#[instrument(skip(claim, user_manager), fields(account_name = claim.sub, account_tier = %claim.tier))] +#[instrument(skip(claim, user_manager), fields(account.name = claim.sub, account.tier = %claim.tier))] pub(crate) async fn add_subscription_items( Extension(claim): Extension, State(user_manager): State, diff --git a/auth/src/error.rs b/auth/src/error.rs index 3b5752516..9e00ebfe8 100644 --- a/auth/src/error.rs +++ b/auth/src/error.rs @@ -26,7 +26,7 @@ pub enum Error { #[error("Incomplete checkout session.")] IncompleteCheckoutSession, #[error("Interacting with stripe resulted in error: {0}.")] - StripeError(#[from] StripeError), + Stripe(#[from] StripeError), #[error("Missing subscription ID from the checkout session.")] MissingSubscriptionId, } diff --git a/deployer/src/deployment/run.rs b/deployer/src/deployment/run.rs index 3cacad92e..d1b729a9d 100644 --- a/deployer/src/deployment/run.rs +++ b/deployer/src/deployment/run.rs @@ -382,10 +382,10 @@ async fn load( load_request.extensions_mut().insert(claim.clone()); - debug!(service_name = %service_name, "loading service"); + debug!(shuttle.service.name = %service_name, "loading service"); let response = runtime_client.load(load_request).await; - debug!(service_name = %service_name, "service loaded"); + debug!(shuttle.service.name = %service_name, "service loaded"); match response { Ok(response) => { let response = response.into_inner(); diff --git a/deployer/src/handlers/mod.rs b/deployer/src/handlers/mod.rs index e22d67ea6..5f5d29b18 100644 --- a/deployer/src/handlers/mod.rs +++ b/deployer/src/handlers/mod.rs @@ -245,7 +245,7 @@ pub async fn get_services( Ok(Json(services)) } -#[instrument(skip_all, fields(%project_name, %service_name))] +#[instrument(skip_all, fields(shuttle.project.name = %project_name, shuttle.service.name = %service_name))] #[utoipa::path( get, path = "/projects/{project_name}/services/{service_name}", @@ -282,7 +282,7 @@ pub async fn get_service( } } -#[instrument(skip_all, fields(%project_name, %service_name))] +#[instrument(skip_all, fields(shuttle.project.name = %project_name, shuttle.service.name = %service_name))] #[utoipa::path( get, path = "/projects/{project_name}/services/{service_name}/resources", @@ -326,7 +326,7 @@ pub async fn get_service_resources( } } -#[instrument(skip_all, fields(%project_name, %service_name, %resource_type))] +#[instrument(skip_all, fields(shuttle.project.name = %project_name, shuttle.service.name = %service_name, %resource_type))] #[utoipa::path( delete, path = "/projects/{project_name}/services/{service_name}/resources/{resource_type}", @@ -383,7 +383,7 @@ pub async fn delete_service_resource( Ok(Json(())) } -#[instrument(skip_all, fields(%project_name, %service_name))] +#[instrument(skip_all, fields(shuttle.project.name = %project_name, shuttle.service.name = %service_name))] #[utoipa::path( post, path = "/projects/{project_name}/services/{service_name}", @@ -462,7 +462,7 @@ pub async fn create_service( Ok(Json(deployment.into())) } -#[instrument(skip_all, fields(%project_name, %service_name))] +#[instrument(skip_all, fields(shuttle.project.name = %project_name, shuttle.service.name = %service_name))] #[utoipa::path( delete, path = "/projects/{project_name}/services/{service_name}", @@ -500,7 +500,7 @@ pub async fn stop_service( Ok(Json(response)) } -#[instrument(skip_all, fields(%project_name, page, limit))] +#[instrument(skip_all, fields(shuttle.project.name = %project_name, page, limit))] #[utoipa::path( get, path = "/projects/{project_name}/deployments", @@ -535,7 +535,7 @@ pub async fn get_deployments( } } -#[instrument(skip_all, fields(%project_name, %deployment_id))] +#[instrument(skip_all, fields(shuttle.project.name = %project_name, %deployment_id))] #[utoipa::path( get, path = "/projects/{project_name}/deployments/{deployment_id}", @@ -560,7 +560,7 @@ pub async fn get_deployment( } } -#[instrument(skip_all, fields(%project_name, %deployment_id))] +#[instrument(skip_all, fields(shuttle.project.name = %project_name, %deployment_id))] #[utoipa::path( delete, path = "/projects/{project_name}/deployments/{deployment_id}", @@ -588,7 +588,7 @@ pub async fn delete_deployment( } } -#[instrument(skip_all, fields(%project_name, %deployment_id))] +#[instrument(skip_all, fields(shuttle.project.name = %project_name, %deployment_id))] #[utoipa::path( put, path = "/projects/{project_name}/deployments/{deployment_id}", @@ -628,7 +628,7 @@ pub async fn start_deployment( } } -#[instrument(skip_all, fields(%project_name, %deployment_id))] +#[instrument(skip_all, fields(shuttle.project.name = %project_name, %deployment_id))] #[utoipa::path( get, path = "/projects/{project_name}/deployments/{deployment_id}/logs", @@ -671,7 +671,7 @@ pub async fn get_logs( } // don't instrument id to prevent it from showing up in deployment log -#[instrument(skip_all, fields(%project_name))] +#[instrument(skip_all, fields(shuttle.project.name = %project_name))] #[utoipa::path( get, path = "/projects/{project_name}/ws/deployments/{deployment_id}/logs", @@ -765,7 +765,7 @@ async fn logs_websocket_handler( let _ = s.close().await; } -#[instrument(skip_all, fields(%project_name))] +#[instrument(skip_all, fields(shuttle.project.name = %project_name))] #[utoipa::path( post, path = "/projects/{project_name}/clean", diff --git a/deployer/src/persistence/mod.rs b/deployer/src/persistence/mod.rs index 7b8aeb466..08dd70aeb 100644 --- a/deployer/src/persistence/mod.rs +++ b/deployer/src/persistence/mod.rs @@ -581,7 +581,7 @@ impl ResourceManager for Persistence { #[async_trait::async_trait] impl AddressGetter for Persistence { - #[instrument(skip(self))] + #[instrument(skip_all, fields(shuttle.service.name = service_name))] async fn get_address_for_service( &self, service_name: &str, diff --git a/deployer/src/proxy.rs b/deployer/src/proxy.rs index 291532be8..af09f1f70 100644 --- a/deployer/src/proxy.rs +++ b/deployer/src/proxy.rs @@ -23,7 +23,7 @@ static PROXY_CLIENT: Lazy>> = Lazy::new(|| ReverseProxy::new(Client::new())); static SERVER_HEADER: Lazy = Lazy::new(|| "shuttle.rs".parse().unwrap()); -#[instrument(name = "proxy_request", skip_all, fields(http.method = %req.method(), http.uri = %req.uri(), http.status_code = field::Empty, service = field::Empty))] +#[instrument(name = "proxy_request", skip_all, fields(http.method = %req.method(), http.uri = %req.uri(), http.status_code = field::Empty, http.host = field::Empty, shuttle.service.name = field::Empty, proxy.status_code = field::Empty))] pub async fn handle( remote_address: SocketAddr, fqdn: FQDN, @@ -45,6 +45,7 @@ pub async fn handle( .to_owned(), None => { trace!("proxy request has no host header"); + Span::current().record("proxy.status_code", StatusCode::BAD_REQUEST.as_u16()); return Ok(Response::builder() .status(StatusCode::BAD_REQUEST) .body(Body::empty()) @@ -55,6 +56,7 @@ pub async fn handle( if host != fqdn { trace!(?host, "proxy won't serve foreign domain"); + Span::current().record("proxy.status_code", StatusCode::BAD_REQUEST.as_u16()); return Ok(Response::builder() .status(StatusCode::BAD_REQUEST) .body(Body::from("this domain is not served by proxy")) @@ -67,6 +69,7 @@ pub async fn handle( Some(project) => project.0, None => { trace!("proxy request has no X-Shuttle-Project header"); + Span::current().record("proxy.status_code", StatusCode::BAD_REQUEST.as_u16()); return Ok(Response::builder() .status(StatusCode::BAD_REQUEST) .body(Body::from("request has no X-Shuttle-Project header")) @@ -75,12 +78,14 @@ pub async fn handle( }; // Record current service for tracing purposes - span.record("shuttle.service", &service); + span.record("shuttle.service.name", &service); let proxy_address = match address_getter.get_address_for_service(&service).await { Ok(Some(address)) => address, Ok(None) => { trace!(?host, service, "service not found on this server"); + Span::current().record("proxy.status_code", StatusCode::NOT_FOUND.as_u16()); + let response_body = format!("could not find service: {}", service); return Ok(Response::builder() .status(StatusCode::NOT_FOUND) @@ -89,6 +94,10 @@ pub async fn handle( } Err(err) => { error!(error = %err, service, "proxy failed to find address for host"); + Span::current().record( + "proxy.status_code", + StatusCode::INTERNAL_SERVER_ERROR.as_u16(), + ); let response_body = format!("failed to find service for host: {}", host); return Ok(Response::builder() @@ -101,6 +110,7 @@ pub async fn handle( match reverse_proxy(remote_address.ip(), &proxy_address.to_string(), req).await { Ok(response) => { Span::current().record("http.status_code", response.status().as_u16()); + Span::current().record("proxy.status_code", StatusCode::OK.as_u16()); Ok(response) } Err(error) => { @@ -118,6 +128,12 @@ pub async fn handle( "error while handling request needing upgrade in reverse proxy" ), }; + + Span::current().record( + "proxy.status_code", + StatusCode::INTERNAL_SERVER_ERROR.as_u16(), + ); + Ok(Response::builder() .status(StatusCode::INTERNAL_SERVER_ERROR) .body(Body::empty()) diff --git a/gateway/src/api/latest.rs b/gateway/src/api/latest.rs index 92567165b..7c26073cd 100644 --- a/gateway/src/api/latest.rs +++ b/gateway/src/api/latest.rs @@ -189,7 +189,7 @@ async fn get_projects_list( Ok(AxumJson(projects)) } -#[instrument(skip_all, fields(%project_name))] +#[instrument(skip_all, fields(shuttle.project.name = %project_name))] #[utoipa::path( post, path = "/projects/{project_name}", @@ -256,7 +256,7 @@ async fn create_project( Ok(AxumJson(response)) } -#[instrument(skip_all, fields(%project_name))] +#[instrument(skip_all, fields(shuttle.project.name = %project_name))] #[utoipa::path( delete, path = "/projects/{project_name}", @@ -312,7 +312,7 @@ struct DeleteProjectParams { dry_run: Option, } -#[instrument(skip_all, fields(project_name = %scoped_user.scope))] +#[instrument(skip_all, fields(shuttle.project.name = %scoped_user.scope))] #[utoipa::path( delete, path = "/projects/{project_name}/delete", @@ -693,7 +693,7 @@ async fn create_acme_account( Ok(AxumJson(res)) } -#[instrument(skip_all, fields(%project_name, %fqdn))] +#[instrument(skip_all, fields(shuttle.project.name = %project_name, %fqdn))] #[utoipa::path( post, path = "/admin/acme/request/{project_name}/{fqdn}", @@ -772,7 +772,7 @@ async fn request_custom_domain_acme_certificate( )) } -#[instrument(skip_all, fields(%project_name, %fqdn))] +#[instrument(skip_all, fields(shuttle.project.name = %project_name, %fqdn))] #[utoipa::path( post, path = "/admin/acme/renew/{project_name}/{fqdn}", diff --git a/gateway/src/project.rs b/gateway/src/project.rs index 7ecf4dfe1..8e79a8fcd 100644 --- a/gateway/src/project.rs +++ b/gateway/src/project.rs @@ -1394,7 +1394,7 @@ where debug!( shuttle.container.id = container.id, - shuttle.service = service.name.to_string(), + shuttle.service.name = %service.name, "{} has {} CPU usage per minute", service.name, cpu_per_minute @@ -1491,7 +1491,7 @@ impl Service { .map_err(|err| err.into()) } - #[instrument(name = "calling status endpoint on container", skip_all, fields(project_name = %self.name))] + #[instrument(name = "calling status endpoint on container", skip_all, fields(shuttle.project.name = %self.name))] pub async fn is_healthy(&mut self) -> bool { let uri = self.uri(format!("/projects/{}/status", self.name)).unwrap(); let resp = timeout(IS_HEALTHY_TIMEOUT, CLIENT.get(uri)).await; diff --git a/gateway/src/proxy.rs b/gateway/src/proxy.rs index 37dc508f1..95c5cc0a5 100644 --- a/gateway/src/proxy.rs +++ b/gateway/src/proxy.rs @@ -104,7 +104,7 @@ impl UserProxy { task_sender: Sender, mut req: Request, ) -> Result { - let span = debug_span!("proxy", http.method = %req.method(), http.host = ?req.headers().get("Host"), http.uri = %req.uri(), http.status_code = field::Empty, project = field::Empty); + let span = debug_span!("proxy", http.method = %req.method(), http.host = ?req.headers().get("Host"), http.uri = %req.uri(), http.status_code = field::Empty, shuttle.project.name = field::Empty); trace!(?req, "serving proxy request"); let fqdn = req @@ -139,7 +139,7 @@ impl UserProxy { .await?; // Record current project for tracing purposes - span.record("project", &project_name.to_string()); + span.record("shuttle.project.name", &project_name.to_string()); let target_ip = project .state diff --git a/gateway/src/service.rs b/gateway/src/service.rs index abffaa6d4..c15fe92ec 100644 --- a/gateway/src/service.rs +++ b/gateway/src/service.rs @@ -964,7 +964,7 @@ impl GatewayService { // Start the project if it is idle if project.state.is_stopped() { - trace!(%project_name, "starting up idle project"); + trace!(shuttle.project.name = %project_name, "starting up idle project"); let handle = self .new_task() diff --git a/gateway/src/task.rs b/gateway/src/task.rs index a73fd0acd..da67439f0 100644 --- a/gateway/src/task.rs +++ b/gateway/src/task.rs @@ -491,7 +491,7 @@ impl Task<()> for ProjectTask { let span = info_span!( "polling project", - ctx.project = ?project_ctx.project_name.to_string(), + shuttle.project.name = %project_ctx.project_name, ctx.state = project_ctx.state.state(), ctx.state_after = field::Empty ); @@ -506,7 +506,7 @@ impl Task<()> for ProjectTask { res = &mut poll => res, _ = timeout => { warn!( - project_name = ?self.project_name, + shuttle.project.name = %self.project_name, "a task has been idling for a long time" ); poll.await diff --git a/provisioner/src/lib.rs b/provisioner/src/lib.rs index e930221b5..9ebb0b9b4 100644 --- a/provisioner/src/lib.rs +++ b/provisioner/src/lib.rs @@ -609,7 +609,7 @@ async fn wait_for_instance( .await? .db_instances .expect("aws to return instances") - .get(0) + .first() .expect("to find the instance just created or modified") .clone();