Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add deep health check #3210

Merged
merged 18 commits into from
Jan 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
cec252e
feat: add deep health check
Chethan-rao Dec 27, 2023
ef5d8b3
chore: run formatter
hyperswitch-bot[bot] Dec 27, 2023
2e1d559
update response type to json and impl health check interface for kafk…
Chethan-rao Dec 28, 2023
28c7d92
Merge branch 'health_checks' of github.com:juspay/hyperswitch into he…
Chethan-rao Dec 28, 2023
7e75ed4
chore: run formatter
hyperswitch-bot[bot] Dec 28, 2023
1a6943d
Merge branch 'health_checks' of github.com:juspay/hyperswitch into he…
Chethan-rao Dec 28, 2023
b2edae1
Merge branch 'health_checks' of github.com:juspay/hyperswitch into he…
Chethan-rao Dec 28, 2023
5040182
return 5xx in case at least one component health is down
Chethan-rao Dec 29, 2023
605c65b
change type names
Chethan-rao Dec 29, 2023
ab5110b
Merge branch 'main' of github.com:juspay/hyperswitch into health_checks
Chethan-rao Jan 2, 2024
0cdfb9d
remove key custodian status and refactor transaction code
Chethan-rao Jan 2, 2024
e3cf92c
chore: run formatter
hyperswitch-bot[bot] Jan 2, 2024
9bc8ce6
Merge branch 'main' of github.com:juspay/hyperswitch into health_checks
Chethan-rao Jan 3, 2024
324279b
chore: run formatter
hyperswitch-bot[bot] Jan 3, 2024
e284c80
Merge branch 'main' of github.com:juspay/hyperswitch into health_checks
Chethan-rao Jan 4, 2024
ba0c55c
update locker health check impl
Chethan-rao Jan 4, 2024
5fcc74e
Merge branch 'health_checks' of github.com:juspay/hyperswitch into he…
Chethan-rao Jan 4, 2024
51d9e1f
Merge branch 'health_checks' of github.com:juspay/hyperswitch into he…
Chethan-rao Jan 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions crates/api_models/src/health_check.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct RouterHealthCheckResponse {
pub database: String,
pub redis: String,
pub locker: String,
}
1 change: 1 addition & 0 deletions crates/api_models/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ pub mod errors;
pub mod events;
pub mod files;
pub mod gsm;
pub mod health_check;
pub mod locker_migration;
pub mod mandates;
pub mod organization;
Expand Down
2 changes: 2 additions & 0 deletions crates/router/src/consts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,5 @@ pub const EMAIL_TOKEN_TIME_IN_SECS: u64 = 60 * 60 * 24; // 1 day
pub const VERIFY_CONNECTOR_ID_PREFIX: &str = "conn_verify";
#[cfg(feature = "olap")]
pub const VERIFY_CONNECTOR_MERCHANT_ID: &str = "test_merchant";

pub const LOCKER_HEALTH_CALL_PATH: &str = "/health";
2 changes: 2 additions & 0 deletions crates/router/src/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ pub mod events;
pub mod file;
pub mod fraud_check;
pub mod gsm;
pub mod health_check;
mod kafka_store;
pub mod locker_mock_up;
pub mod mandate;
Expand Down Expand Up @@ -103,6 +104,7 @@ pub trait StorageInterface:
+ user_role::UserRoleInterface
+ authorization::AuthorizationInterface
+ user::sample_data::BatchSampleDataInterface
+ health_check::HealthCheckInterface
+ 'static
{
fn get_scheduler_db(&self) -> Box<dyn scheduler::SchedulerInterface>;
Expand Down
147 changes: 147 additions & 0 deletions crates/router/src/db/health_check.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
use async_bb8_diesel::{AsyncConnection, AsyncRunQueryDsl};
use diesel_models::ConfigNew;
use error_stack::ResultExt;
use router_env::logger;

use super::{MockDb, StorageInterface, Store};
use crate::{
connection,
consts::LOCKER_HEALTH_CALL_PATH,
core::errors::{self, CustomResult},
routes,
services::api as services,
types::storage,
};

#[async_trait::async_trait]
pub trait HealthCheckInterface {
async fn health_check_db(&self) -> CustomResult<(), errors::HealthCheckDBError>;
async fn health_check_redis(
&self,
db: &dyn StorageInterface,
) -> CustomResult<(), errors::HealthCheckRedisError>;
async fn health_check_locker(
&self,
state: &routes::AppState,
) -> CustomResult<(), errors::HealthCheckLockerError>;
}

#[async_trait::async_trait]
impl HealthCheckInterface for Store {
async fn health_check_db(&self) -> CustomResult<(), errors::HealthCheckDBError> {
let conn = connection::pg_connection_write(self)
.await
.change_context(errors::HealthCheckDBError::DBError)?;

let _data = conn
.transaction_async(|conn| {
Chethan-rao marked this conversation as resolved.
Show resolved Hide resolved
Box::pin(async move {
let query =
diesel::select(diesel::dsl::sql::<diesel::sql_types::Integer>("1 + 1"));
let _x: i32 = query.get_result_async(&conn).await.map_err(|err| {
logger::error!(read_err=?err,"Error while reading element in the database");
errors::HealthCheckDBError::DBReadError
})?;

logger::debug!("Database read was successful");

let config = ConfigNew {
key: "test_key".to_string(),
config: "test_value".to_string(),
};

config.insert(&conn).await.map_err(|err| {
logger::error!(write_err=?err,"Error while writing to database");
errors::HealthCheckDBError::DBWriteError
})?;

logger::debug!("Database write was successful");

storage::Config::delete_by_key(&conn, "test_key").await.map_err(|err| {
logger::error!(delete_err=?err,"Error while deleting element in the database");
errors::HealthCheckDBError::DBDeleteError
})?;

logger::debug!("Database delete was successful");

Ok::<_, errors::HealthCheckDBError>(())
})
})
.await?;

Ok(())
}

async fn health_check_redis(
&self,
db: &dyn StorageInterface,
) -> CustomResult<(), errors::HealthCheckRedisError> {
let redis_conn = db
.get_redis_conn()
.change_context(errors::HealthCheckRedisError::RedisConnectionError)?;

redis_conn
.serialize_and_set_key_with_expiry("test_key", "test_value", 30)
.await
.change_context(errors::HealthCheckRedisError::SetFailed)?;

logger::debug!("Redis set_key was successful");

redis_conn
.get_key("test_key")
.await
.change_context(errors::HealthCheckRedisError::GetFailed)?;

logger::debug!("Redis get_key was successful");

redis_conn
.delete_key("test_key")
.await
.change_context(errors::HealthCheckRedisError::DeleteFailed)?;

logger::debug!("Redis delete_key was successful");

Ok(())
Chethan-rao marked this conversation as resolved.
Show resolved Hide resolved
}

async fn health_check_locker(
&self,
state: &routes::AppState,
) -> CustomResult<(), errors::HealthCheckLockerError> {
let locker = &state.conf.locker;
if !locker.mock_locker {
let mut url = locker.host_rs.to_owned();
url.push_str(LOCKER_HEALTH_CALL_PATH);
let request = services::Request::new(services::Method::Get, &url);
services::call_connector_api(state, request)
.await
.change_context(errors::HealthCheckLockerError::FailedToCallLocker)?
.ok();
}

logger::debug!("Locker call was successful");

Ok(())
}
}

#[async_trait::async_trait]
impl HealthCheckInterface for MockDb {
async fn health_check_db(&self) -> CustomResult<(), errors::HealthCheckDBError> {
Ok(())
}

async fn health_check_redis(
&self,
_: &dyn StorageInterface,
) -> CustomResult<(), errors::HealthCheckRedisError> {
Ok(())
}

async fn health_check_locker(
&self,
_: &routes::AppState,
) -> CustomResult<(), errors::HealthCheckLockerError> {
Ok(())
}
}
23 changes: 23 additions & 0 deletions crates/router/src/db/kafka_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ use crate::{
events::EventInterface,
file::FileMetadataInterface,
gsm::GsmInterface,
health_check::HealthCheckInterface,
locker_mock_up::LockerMockUpInterface,
mandate::MandateInterface,
merchant_account::MerchantAccountInterface,
Expand All @@ -57,6 +58,7 @@ use crate::{
routing_algorithm::RoutingAlgorithmInterface,
MasterKeyInterface, StorageInterface,
},
routes,
services::{authentication, kafka::KafkaProducer, Store},
types::{
domain,
Expand Down Expand Up @@ -2131,3 +2133,24 @@ impl AuthorizationInterface for KafkaStore {
.await
}
}

#[async_trait::async_trait]
impl HealthCheckInterface for KafkaStore {
Chethan-rao marked this conversation as resolved.
Show resolved Hide resolved
async fn health_check_db(&self) -> CustomResult<(), errors::HealthCheckDBError> {
self.diesel_store.health_check_db().await
}

async fn health_check_redis(
&self,
db: &dyn StorageInterface,
) -> CustomResult<(), errors::HealthCheckRedisError> {
self.diesel_store.health_check_redis(db).await
}

async fn health_check_locker(
&self,
state: &routes::AppState,
) -> CustomResult<(), errors::HealthCheckLockerError> {
self.diesel_store.health_check_locker(state).await
}
}
5 changes: 3 additions & 2 deletions crates/router/src/routes/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -253,9 +253,10 @@ pub struct Health;

impl Health {
pub fn server(state: AppState) -> Scope {
web::scope("")
web::scope("health")
.app_data(web::Data::new(state))
.service(web::resource("/health").route(web::get().to(health)))
.service(web::resource("").route(web::get().to(health)))
.service(web::resource("/deep_check").route(web::post().to(deep_health_check)))
Chethan-rao marked this conversation as resolved.
Show resolved Hide resolved
}
}

Expand Down
62 changes: 60 additions & 2 deletions crates/router/src/routes/health.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use actix_web::web;
use api_models::health_check::RouterHealthCheckResponse;
use router_env::{instrument, logger, tracing};

use crate::routes::metrics;

use super::app;
use crate::{routes::metrics, services};
/// .
// #[logger::instrument(skip_all, name = "name1", level = "warn", fields( key1 = "val1" ))]
#[instrument(skip_all)]
Expand All @@ -11,3 +13,59 @@ pub async fn health() -> impl actix_web::Responder {
logger::info!("Health was called");
actix_web::HttpResponse::Ok().body("health is good")
}

#[instrument(skip_all)]
pub async fn deep_health_check(state: web::Data<app::AppState>) -> impl actix_web::Responder {
metrics::HEALTH_METRIC.add(&metrics::CONTEXT, 1, &[]);
let db = &*state.store;
let mut status_code = 200;
logger::info!("Deep health check was called");

logger::debug!("Database health check begin");

let db_status = match db.health_check_db().await {
Ok(_) => "Health is good".to_string(),
Err(err) => {
status_code = 500;
err.to_string()
}
};
logger::debug!("Database health check end");

logger::debug!("Redis health check begin");

let redis_status = match db.health_check_redis(db).await {
Ok(_) => "Health is good".to_string(),
Err(err) => {
status_code = 500;
err.to_string()
}
};

logger::debug!("Redis health check end");

logger::debug!("Locker health check begin");

let locker_status = match db.health_check_locker(&state).await {
Ok(_) => "Health is good".to_string(),
Err(err) => {
status_code = 500;
err.to_string()
}
};

logger::debug!("Locker health check end");

let response = serde_json::to_string(&RouterHealthCheckResponse {
database: db_status,
redis: redis_status,
locker: locker_status,
})
.unwrap_or_default();

if status_code == 200 {
services::http_response_json(response)
} else {
services::http_server_error_json_response(response)
}
}
8 changes: 8 additions & 0 deletions crates/router/src/services/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1138,6 +1138,14 @@ pub fn http_response_json<T: body::MessageBody + 'static>(response: T) -> HttpRe
.body(response)
}

pub fn http_server_error_json_response<T: body::MessageBody + 'static>(
response: T,
) -> HttpResponse {
HttpResponse::InternalServerError()
.content_type(mime::APPLICATION_JSON)
.body(response)
}

pub fn http_response_json_with_headers<T: body::MessageBody + 'static>(
response: T,
mut headers: Vec<(String, String)>,
Expand Down
2 changes: 2 additions & 0 deletions crates/router/src/services/api/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use router_env::tracing_actix_web::RequestId;
use super::{request::Maskable, Request};
use crate::{
configs::settings::{Locker, Proxy},
consts::LOCKER_HEALTH_CALL_PATH,
core::{
errors::{ApiClientError, CustomResult},
payments,
Expand Down Expand Up @@ -119,6 +120,7 @@ pub fn proxy_bypass_urls(locker: &Locker) -> Vec<String> {
format!("{locker_host_rs}/cards/add"),
format!("{locker_host_rs}/cards/retrieve"),
format!("{locker_host_rs}/cards/delete"),
format!("{locker_host_rs}{}", LOCKER_HEALTH_CALL_PATH),
format!("{locker_host}/card/addCard"),
format!("{locker_host}/card/getCard"),
format!("{locker_host}/card/deleteCard"),
Expand Down
50 changes: 50 additions & 0 deletions crates/storage_impl/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -376,3 +376,53 @@ pub enum ConnectorError {
#[error("Missing 3DS redirection payload: {field_name}")]
MissingConnectorRedirectionPayload { field_name: &'static str },
}

#[derive(Debug, thiserror::Error)]
pub enum HealthCheckDBError {
#[error("Error while connecting to database")]
DBError,
#[error("Error while writing to database")]
DBWriteError,
#[error("Error while reading element in the database")]
DBReadError,
#[error("Error while deleting element in the database")]
DBDeleteError,
#[error("Unpredictable error occurred")]
UnknownError,
#[error("Error in database transaction")]
TransactionError,
}

impl From<diesel::result::Error> for HealthCheckDBError {
fn from(error: diesel::result::Error) -> Self {
match error {
diesel::result::Error::DatabaseError(_, _) => Self::DBError,

diesel::result::Error::RollbackErrorOnCommit { .. }
| diesel::result::Error::RollbackTransaction
| diesel::result::Error::AlreadyInTransaction
| diesel::result::Error::NotInTransaction
| diesel::result::Error::BrokenTransactionManager => Self::TransactionError,

_ => Self::UnknownError,
}
}
}

#[derive(Debug, thiserror::Error)]
pub enum HealthCheckRedisError {
#[error("Failed to establish Redis connection")]
RedisConnectionError,
#[error("Failed to set key value in Redis")]
SetFailed,
#[error("Failed to get key value in Redis")]
GetFailed,
#[error("Failed to delete key value in Redis")]
DeleteFailed,
}

#[derive(Debug, Clone, thiserror::Error)]
pub enum HealthCheckLockerError {
#[error("Failed to establish Locker connection")]
FailedToCallLocker,
}
Loading