Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add deep health check #3210

Merged
merged 18 commits into from
Jan 5, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
cec252e
feat: add deep health check
Chethan-rao Dec 27, 2023
ef5d8b3
chore: run formatter
hyperswitch-bot[bot] Dec 27, 2023
2e1d559
update response type to json and impl health check interface for kafk…
Chethan-rao Dec 28, 2023
28c7d92
Merge branch 'health_checks' of github.com:juspay/hyperswitch into he…
Chethan-rao Dec 28, 2023
7e75ed4
chore: run formatter
hyperswitch-bot[bot] Dec 28, 2023
1a6943d
Merge branch 'health_checks' of github.com:juspay/hyperswitch into he…
Chethan-rao Dec 28, 2023
b2edae1
Merge branch 'health_checks' of github.com:juspay/hyperswitch into he…
Chethan-rao Dec 28, 2023
5040182
return 5xx in case at least one component health is down
Chethan-rao Dec 29, 2023
605c65b
change type names
Chethan-rao Dec 29, 2023
ab5110b
Merge branch 'main' of github.com:juspay/hyperswitch into health_checks
Chethan-rao Jan 2, 2024
0cdfb9d
remove key custodian status and refactor transaction code
Chethan-rao Jan 2, 2024
e3cf92c
chore: run formatter
hyperswitch-bot[bot] Jan 2, 2024
9bc8ce6
Merge branch 'main' of github.com:juspay/hyperswitch into health_checks
Chethan-rao Jan 3, 2024
324279b
chore: run formatter
hyperswitch-bot[bot] Jan 3, 2024
e284c80
Merge branch 'main' of github.com:juspay/hyperswitch into health_checks
Chethan-rao Jan 4, 2024
ba0c55c
update locker health check impl
Chethan-rao Jan 4, 2024
5fcc74e
Merge branch 'health_checks' of github.com:juspay/hyperswitch into he…
Chethan-rao Jan 4, 2024
51d9e1f
Merge branch 'health_checks' of github.com:juspay/hyperswitch into he…
Chethan-rao Jan 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions crates/api_models/src/health_check.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct RouterHealthCheckResponse {
pub database: String,
pub redis: String,
pub locker: LockerHealthResponse,
}

#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct LockerHealthResponse {
pub status: String,
pub key_custodian_status: KeyCustodianStatus,
}

#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub enum KeyCustodianStatus {
Unavailable,
Locked,
Unlocked,
}
Chethan-rao marked this conversation as resolved.
Show resolved Hide resolved
1 change: 1 addition & 0 deletions crates/api_models/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ pub mod errors;
pub mod events;
pub mod files;
pub mod gsm;
pub mod health_check;
pub mod locker_migration;
pub mod mandates;
pub mod organization;
Expand Down
2 changes: 2 additions & 0 deletions crates/router/src/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ pub mod events;
pub mod file;
pub mod fraud_check;
pub mod gsm;
pub mod health_check;
mod kafka_store;
pub mod locker_mock_up;
pub mod mandate;
Expand Down Expand Up @@ -103,6 +104,7 @@ pub trait StorageInterface:
+ user_role::UserRoleInterface
+ authorization::AuthorizationInterface
+ user::sample_data::BatchSampleDataInterface
+ health_check::HealthCheckInterface
+ 'static
{
fn get_scheduler_db(&self) -> Box<dyn scheduler::SchedulerInterface>;
Expand Down
157 changes: 157 additions & 0 deletions crates/router/src/db/health_check.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
use async_bb8_diesel::{AsyncConnection, AsyncRunQueryDsl};
use diesel_models::ConfigNew;
use error_stack::ResultExt;
use router_env::logger;

use super::{MockDb, StorageInterface, Store};
use crate::{
connection,
core::errors::{self, CustomResult},
routes,
services::api as services,
};

#[async_trait::async_trait]
pub trait HealthCheckInterface {
async fn health_check_db(
&self,
db: &dyn StorageInterface,
) -> CustomResult<(), errors::HealthCheckDBError>;
async fn health_check_redis(
&self,
db: &dyn StorageInterface,
) -> CustomResult<(), errors::HealthCheckRedisError>;
async fn health_check_locker(
&self,
state: &routes::AppState,
) -> CustomResult<u16, errors::HealthCheckLockerError>;
}

#[async_trait::async_trait]
impl HealthCheckInterface for Store {
async fn health_check_db(
&self,
db: &dyn StorageInterface,
) -> CustomResult<(), errors::HealthCheckDBError> {
let conn = connection::pg_connection_write(self)
.await
.change_context(errors::HealthCheckDBError::DBError)?;

let _data = conn
.transaction_async(|conn| {
Chethan-rao marked this conversation as resolved.
Show resolved Hide resolved
Box::pin(async move {
let query =
diesel::select(diesel::dsl::sql::<diesel::sql_types::Integer>("1 + 1"));
let _x: i32 = query.get_result_async(&conn).await.map_err(|err| {
logger::error!(read_err=?err,"Error while reading element in the database");
errors::HealthCheckDBError::DBReadError
})?;

logger::debug!("Database read was successful");

db.insert_config(ConfigNew {
key: "test_key".to_string(),
config: "test_value".to_string(),
})
.await
.map_err(|err| {
logger::error!(write_err=?err,"Error while writing to database");
errors::HealthCheckDBError::DBWriteError
})?;

logger::debug!("Database write was successful");

db.delete_config_by_key("test_key").await.map_err(|err| {
logger::error!(delete_err=?err,"Error while deleting element in the database");
errors::HealthCheckDBError::DBDeleteError
})?;

logger::debug!("Database delete was successful");

Ok::<_, errors::HealthCheckDBError>(())
})
})
.await?;

Ok(())
}

async fn health_check_redis(
&self,
db: &dyn StorageInterface,
) -> CustomResult<(), errors::HealthCheckRedisError> {
let redis_conn = db
.get_redis_conn()
.change_context(errors::HealthCheckRedisError::RedisConnectionError)?;

redis_conn
.serialize_and_set_key_with_expiry("test_key", "test_value", 30)
.await
.change_context(errors::HealthCheckRedisError::SetFailed)?;

logger::debug!("Redis set_key was successful");

redis_conn
.get_key("test_key")
.await
.change_context(errors::HealthCheckRedisError::GetFailed)?;

logger::debug!("Redis get_key was successful");

redis_conn
.delete_key("test_key")
.await
.change_context(errors::HealthCheckRedisError::DeleteFailed)?;

logger::debug!("Redis delete_key was successful");

Ok(())
Chethan-rao marked this conversation as resolved.
Show resolved Hide resolved
}

async fn health_check_locker(
&self,
state: &routes::AppState,
) -> CustomResult<u16, errors::HealthCheckLockerError> {
let locker = &state.conf.locker;
let mut status_code = 0;
if !locker.mock_locker {
let mut url = locker.host_rs.to_owned();
url.push_str("/health");
let request = services::Request::new(services::Method::Get, &url);
status_code = services::call_connector_api(state, request)
.await
.change_context(errors::HealthCheckLockerError::FailedToCallLocker)?
.map(|resp| resp.status_code)
.map_err(|err| err.status_code)
.unwrap_or_else(|code| code);
}
Chethan-rao marked this conversation as resolved.
Show resolved Hide resolved

logger::debug!("Locker call was successful");

Ok(status_code)
}
}

#[async_trait::async_trait]
impl HealthCheckInterface for MockDb {
async fn health_check_db(
&self,
_: &dyn StorageInterface,
) -> CustomResult<(), errors::HealthCheckDBError> {
Ok(())
}

async fn health_check_redis(
&self,
_: &dyn StorageInterface,
) -> CustomResult<(), errors::HealthCheckRedisError> {
Ok(())
}

async fn health_check_locker(
&self,
_: &routes::AppState,
) -> CustomResult<u16, errors::HealthCheckLockerError> {
Ok(0)
}
}
26 changes: 26 additions & 0 deletions crates/router/src/db/kafka_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ use crate::{
events::EventInterface,
file::FileMetadataInterface,
gsm::GsmInterface,
health_check::HealthCheckInterface,
locker_mock_up::LockerMockUpInterface,
mandate::MandateInterface,
merchant_account::MerchantAccountInterface,
Expand All @@ -57,6 +58,7 @@ use crate::{
routing_algorithm::RoutingAlgorithmInterface,
MasterKeyInterface, StorageInterface,
},
routes,
services::{authentication, kafka::KafkaProducer, Store},
types::{
domain,
Expand Down Expand Up @@ -2131,3 +2133,27 @@ impl AuthorizationInterface for KafkaStore {
.await
}
}

#[async_trait::async_trait]
impl HealthCheckInterface for KafkaStore {
Chethan-rao marked this conversation as resolved.
Show resolved Hide resolved
async fn health_check_db(
&self,
db: &dyn StorageInterface,
) -> CustomResult<(), errors::HealthCheckDBError> {
self.diesel_store.health_check_db(db).await
}

async fn health_check_redis(
&self,
db: &dyn StorageInterface,
) -> CustomResult<(), errors::HealthCheckRedisError> {
self.diesel_store.health_check_redis(db).await
}

async fn health_check_locker(
&self,
state: &routes::AppState,
) -> CustomResult<u16, errors::HealthCheckLockerError> {
self.diesel_store.health_check_locker(state).await
}
}
5 changes: 3 additions & 2 deletions crates/router/src/routes/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -253,9 +253,10 @@ pub struct Health;

impl Health {
pub fn server(state: AppState) -> Scope {
web::scope("")
web::scope("health")
.app_data(web::Data::new(state))
.service(web::resource("/health").route(web::get().to(health)))
.service(web::resource("").route(web::get().to(health)))
.service(web::resource("/deep_check").route(web::post().to(deep_health_check)))
Chethan-rao marked this conversation as resolved.
Show resolved Hide resolved
}
}

Expand Down
75 changes: 73 additions & 2 deletions crates/router/src/routes/health.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
use actix_web::web;
use api_models::health_check::{
KeyCustodianStatus, LockerHealthResponse, RouterHealthCheckResponse,
};
use router_env::{instrument, logger, tracing};

use crate::routes::metrics;

use super::app;
use crate::{routes::metrics, services};
/// .
// #[logger::instrument(skip_all, name = "name1", level = "warn", fields( key1 = "val1" ))]
#[instrument(skip_all)]
Expand All @@ -11,3 +15,70 @@ pub async fn health() -> impl actix_web::Responder {
logger::info!("Health was called");
actix_web::HttpResponse::Ok().body("health is good")
}

#[instrument(skip_all)]
pub async fn deep_health_check(state: web::Data<app::AppState>) -> impl actix_web::Responder {
metrics::HEALTH_METRIC.add(&metrics::CONTEXT, 1, &[]);
let db = &*state.store;
let mut status_code = 200;
logger::info!("Deep health check was called");

logger::debug!("Database health check begin");

let db_status = match db.health_check_db(db).await {
Ok(_) => "Health is good".to_string(),
Err(err) => {
status_code = 500;
err.to_string()
}
};
logger::debug!("Database health check end");

logger::debug!("Redis health check begin");

let redis_status = match db.health_check_redis(db).await {
Ok(_) => "Health is good".to_string(),
Err(err) => {
status_code = 500;
err.to_string()
}
};

logger::debug!("Redis health check end");

logger::debug!("Locker health check begin");

let (locker_status, key_custodian_status) = match db.health_check_locker(&state).await {
Ok(status_code) => {
let status_message = "Health is good".to_string();
let key_custodian_status = if status_code == 403 {
KeyCustodianStatus::Locked
} else {
KeyCustodianStatus::Unlocked
};
(status_message, key_custodian_status)
}
Err(err) => {
status_code = 500;
(err.to_string(), KeyCustodianStatus::Unavailable)
}
};

logger::debug!("Locker health check end");

let response = serde_json::to_string(&RouterHealthCheckResponse {
database: db_status,
redis: redis_status,
locker: LockerHealthResponse {
status: locker_status,
key_custodian_status,
},
})
.unwrap_or_default();

if status_code == 200 {
services::http_response_json(response)
} else {
services::http_server_error_json_response(response)
}
}
8 changes: 8 additions & 0 deletions crates/router/src/services/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1138,6 +1138,14 @@ pub fn http_response_json<T: body::MessageBody + 'static>(response: T) -> HttpRe
.body(response)
}

pub fn http_server_error_json_response<T: body::MessageBody + 'static>(
response: T,
) -> HttpResponse {
HttpResponse::InternalServerError()
.content_type(mime::APPLICATION_JSON)
.body(response)
}

pub fn http_response_json_with_headers<T: body::MessageBody + 'static>(
response: T,
mut headers: Vec<(String, String)>,
Expand Down
Loading
Loading