From fe44ce6558ce0b23e5c7dcf7c298b574b05cb305 Mon Sep 17 00:00:00 2001 From: Mouaad Aallam Date: Wed, 11 Oct 2023 23:17:46 +0200 Subject: [PATCH] chore: add benchmark --- benchmark/build.gradle.kts | 39 +++++++++++++++++++ .../ktoken/benchmark/EncodeBenchmark.kt | 29 ++++++++++++++ build.gradle.kts | 3 +- gradle/libs.versions.toml | 6 +++ settings.gradle.kts | 1 + 5 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 benchmark/build.gradle.kts create mode 100644 benchmark/main/src/com/aallam/ktoken/benchmark/EncodeBenchmark.kt diff --git a/benchmark/build.gradle.kts b/benchmark/build.gradle.kts new file mode 100644 index 0000000..95d2b56 --- /dev/null +++ b/benchmark/build.gradle.kts @@ -0,0 +1,39 @@ +import kotlinx.benchmark.gradle.JvmBenchmarkTarget +import kotlinx.benchmark.gradle.benchmark +import org.jetbrains.kotlin.allopen.gradle.AllOpenExtension + +plugins { + java + kotlin("jvm") + kotlin("plugin.allopen") + id("org.jetbrains.kotlinx.benchmark") +} + +sourceSets.all { + java.setSrcDirs(listOf("$name/src")) + resources.setSrcDirs(listOf("$name/resources")) +} + +configure { + annotation("org.openjdk.jmh.annotations.State") +} + +dependencies { + implementation(project(":ktoken")) + implementation(libs.kotlinx.benchmark) +} + +benchmark { + configurations { + named("main") { + warmups = 1 + iterations = 5 + } + } + targets { + register("main") { + this as JvmBenchmarkTarget + jmhVersion = "1.37" + } + } +} diff --git a/benchmark/main/src/com/aallam/ktoken/benchmark/EncodeBenchmark.kt b/benchmark/main/src/com/aallam/ktoken/benchmark/EncodeBenchmark.kt new file mode 100644 index 0000000..65871ef --- /dev/null +++ b/benchmark/main/src/com/aallam/ktoken/benchmark/EncodeBenchmark.kt @@ -0,0 +1,29 @@ +package com.aallam.ktoken.benchmark + +import com.aallam.ktoken.Tokenizer +import kotlinx.coroutines.runBlocking +import org.openjdk.jmh.annotations.* +import java.net.URL +import java.nio.charset.StandardCharsets +import java.util.* + +@State(Scope.Benchmark) +@Fork(2) +@BenchmarkMode(Mode.SingleShotTime) +class EncodeBenchmark { + private lateinit var text: String + private lateinit var tokenizer: Tokenizer + + @Setup + fun setUp() = runBlocking { + val url = URL("https://unicode.org/udhr/assemblies/full_all.txt") + Scanner(url.openStream(), StandardCharsets.UTF_8.name()) + .use { scanner -> text = scanner.useDelimiter("\\A").next() } + tokenizer = Tokenizer.encodingForModel("gpt-4") + } + + @Benchmark + fun encode(): Int { + return tokenizer.encode(text).size + } +} diff --git a/build.gradle.kts b/build.gradle.kts index 27dabbf..c5341ad 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -3,12 +3,13 @@ import org.gradle.api.tasks.testing.logging.TestExceptionFormat import org.gradle.api.tasks.testing.logging.TestLogEvent.* import org.jetbrains.dokka.gradle.DokkaMultiModuleTask -@Suppress("DSL_SCOPE_VIOLATION") plugins { alias(libs.plugins.kotlin.multiplaform) apply false alias(libs.plugins.kotlinx.binary.validator) apply false alias(libs.plugins.maven.publish) apply false alias(libs.plugins.spotless) apply false + alias(libs.plugins.kotlinx.benchmark) apply false + alias(libs.plugins.kotlin.allopen) apply false alias(libs.plugins.dokka) } diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index eb6a22e..fe22b44 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -5,6 +5,7 @@ ktor = "2.3.5" okio = "3.6.0" openai = "3.5.0" dataframe = "0.11.1" +benchmark = "0.4.9" [libraries] # Coroutines @@ -24,6 +25,9 @@ okio = { group = "com.squareup.okio", name = "okio", version.ref = "okio" } openai-client = { group = "com.aallam.openai", name = "openai-client", version.ref = "openai" } # Dataframe dataframe = { group = "org.jetbrains.kotlinx", name = "dataframe", version.ref = "dataframe" } +# benchmark +kotlinx-benchmark = { module = "org.jetbrains.kotlinx:kotlinx-benchmark-runtime", version.ref = "benchmark" } + [plugins] kotlin-multiplaform = { id = "org.jetbrains.kotlin.multiplatform", version.ref = "kotlin" } @@ -31,3 +35,5 @@ kotlinx-binary-validator = { id = "org.jetbrains.kotlinx.binary-compatibility-va maven-publish = { id = "com.vanniktech.maven.publish", version = "0.25.3" } spotless = { id = "com.diffplug.gradle.spotless", version = "6.22.0" } dokka = { id = "org.jetbrains.dokka", version = "1.8.20" } +kotlinx-benchmark = { id = "org.jetbrains.kotlinx.benchmark", version.ref = "benchmark" } +kotlin-allopen = { id = "org.jetbrains.kotlin.plugin.allopen", version.ref = "kotlin" } diff --git a/settings.gradle.kts b/settings.gradle.kts index 3362966..9f5da95 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -3,6 +3,7 @@ rootProject.name = "ktoken" include(":ktoken") include(":ktoken-bom") include(":sample") +include(":benchmark") pluginManagement { repositories {