Skip to content

Commit

Permalink
fix(ktor): scalability for server (#559)
Browse files Browse the repository at this point in the history
* fix(ktor): scalability for server

* noticed slow processing in ktor, when there is a high amount of load. The thread pool sizes (connectionGroupSize, workerGroupSize, and callGroupSize)  dynamically calculated based on the available processors, ensuring optimal performance.

This setup will be 16, 32, 32 instead of 8, 8, 16 -> This configuration assumes that application processing is more CPU-intensive than connection handling. Not true looking at metrics.

checking the docs:
If /token processing is delayed, we can increase callGroupSize gradually to handle more concurrent requests.

consider:
install(IdleTimeout) {
    requestTimeoutMillis = 15000
    idleTimeoutMillis = 60000
}

to handle connection not consuming use.

*

* fix(server):  without setting limit for cpu, not overloading

we devide by 2

* fix(server): more robust handling of scale, ensure that the callGroupSize never exceeds the database maxConnectionPool,
that could start starvation or exceed the database `max_connection` limit.

* set max_connections flag = 200
* increase the production pool size
* 10 pods * 20 connections = 200 connections (matches a database with max_connections = 200)

* fix(server): remove comment

* update(build): update deps

* update(metrics): max replicas is 12, so assure the number of concurrent connections for the database matches the replicas * pol_max
  • Loading branch information
ybelMekk authored Dec 12, 2024
1 parent 76c2261 commit 9715ea3
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 11 deletions.
9 changes: 8 additions & 1 deletion build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ val kotestVersion = "5.9.1"
val kotlinLoggingVersion = "3.0.5"
val kotlinVersion = "2.0.21"
val kotliqueryVersion = "1.9.0"
val ktorVersion = "3.0.1"
val ktorVersion = "3.0.2"
val logbackVersion = "1.5.12"
val logstashLogbackEncoderVersion = "8.0"
val micrometerRegistryPrometheusVersion = "1.14.0"
Expand Down Expand Up @@ -54,6 +54,13 @@ repositories {
mavenCentral()
}


configurations.all {
resolutionStrategy {
force("org.apache.commons:commons-compress:1.26.0")
}
}

dependencies {
implementation(kotlin("stdlib"))
implementation("org.jetbrains.kotlin:kotlin-reflect:$kotlinVersion")
Expand Down
3 changes: 3 additions & 0 deletions charts/templates/tokendings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ spec:
databases:
- name: tokendings
envVarPrefix: DB
flags:
- name: max_connections
value: "300"
ingresses:
- "{{- include "tokenx.tokendings.URL" . }}"
{{- if .Values.tokendings.mapSubjectTokenClaims }}
Expand Down
15 changes: 7 additions & 8 deletions src/main/kotlin/io/nais/security/oauth2/TokenExchangeApp.kt
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ import io.micrometer.prometheus.PrometheusConfig
import io.micrometer.prometheus.PrometheusMeterRegistry
import io.nais.security.oauth2.authentication.clientRegistrationAuth
import io.nais.security.oauth2.config.AppConfiguration
import io.nais.security.oauth2.config.HikariProperties
import io.nais.security.oauth2.config.configByProfile
import io.nais.security.oauth2.config.isNonProd
import io.nais.security.oauth2.metrics.Metrics
Expand Down Expand Up @@ -81,15 +82,17 @@ fun main() {

fun server(): EmbeddedServer<NettyApplicationEngine, NettyApplicationEngine.Configuration> {
val config = configByProfile()
val processors = Runtime.getRuntime().availableProcessors()
val maxConnectionPool = if (isNonProd()) HikariProperties.MAX_POOL_SIZE_NON_PROD else HikariProperties.MAX_POOL_SIZE_PROD
return embeddedServer(
Netty,
configure = {
connector {
port = config.serverProperties.port
}
connectionGroupSize = 8
workerGroupSize = 8
callGroupSize = 16
connectionGroupSize = maxOf(1, processors / 2)
workerGroupSize = processors
callGroupSize = maxOf(1, minOf(processors * 2, maxConnectionPool))
},
module = {
tokenExchangeApp(config, DefaultRouting(config))
Expand Down Expand Up @@ -149,11 +152,7 @@ fun Application.tokenExchangeApp(config: AppConfiguration, routing: ApiRouting)
call.respondWithError(cause, includeErrorDetails)
}

is BadRequestException -> {
call.respond(HttpStatusCode.BadRequest, "invalid request content")
}

is JsonProcessingException -> {
is BadRequestException, is JsonProcessingException -> {
call.respond(HttpStatusCode.BadRequest, "invalid request content")
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ object HikariProperties {
const val IDLE_TIMEOUT_PROD = 300000L
const val CONNECTION_TIMEOUT_PROD = 5000L
const val MAX_LIFETIME_PROD = 1800000L
const val MAX_POOL_SIZE_PROD = 10
const val MIN_IDLE_CONNECTIONS_PROD = 5
const val MAX_POOL_SIZE_PROD = 20
const val MIN_IDLE_CONNECTIONS_PROD = 10

// Non-production-specific
const val IDLE_TIMEOUT_NON_PROD = 600000L
Expand Down

0 comments on commit 9715ea3

Please sign in to comment.