From 17d6d5987eaf11add3e3f9736c7ef9078782f0be Mon Sep 17 00:00:00 2001 From: Simon Vergauwen Date: Fri, 21 Apr 2023 13:44:19 +0200 Subject: [PATCH 1/9] Add Config --- src/commonMain/kotlin/com/xebia/functional/config/config.kt | 4 ++++ src/commonMain/kotlin/com/xebia/functional/config/env.kt | 4 ++++ .../kotlin/com/xebia/functional/config/ConfigSpec.kt | 4 ++++ src/jsMain/kotlin/com/xebia/functional/config/getenv.kt | 2 ++ src/jvmMain/kotlin/com/xebia/functional/config/getenv.kt | 2 ++ src/nativeMain/kotlin/com/xebia/functional/config/getenv.kt | 2 ++ 6 files changed, 18 insertions(+) create mode 100644 src/commonMain/kotlin/com/xebia/functional/config/config.kt create mode 100644 src/commonMain/kotlin/com/xebia/functional/config/env.kt create mode 100644 src/commonTest/kotlin/com/xebia/functional/config/ConfigSpec.kt create mode 100644 src/jsMain/kotlin/com/xebia/functional/config/getenv.kt create mode 100644 src/jvmMain/kotlin/com/xebia/functional/config/getenv.kt create mode 100644 src/nativeMain/kotlin/com/xebia/functional/config/getenv.kt diff --git a/src/commonMain/kotlin/com/xebia/functional/config/config.kt b/src/commonMain/kotlin/com/xebia/functional/config/config.kt new file mode 100644 index 000000000..8531464ba --- /dev/null +++ b/src/commonMain/kotlin/com/xebia/functional/config/config.kt @@ -0,0 +1,4 @@ +package com.xebia.functional.config + +class config { +} \ No newline at end of file diff --git a/src/commonMain/kotlin/com/xebia/functional/config/env.kt b/src/commonMain/kotlin/com/xebia/functional/config/env.kt new file mode 100644 index 000000000..4213496f6 --- /dev/null +++ b/src/commonMain/kotlin/com/xebia/functional/config/env.kt @@ -0,0 +1,4 @@ +package com.xebia.functional.config + +class env { +} \ No newline at end of file diff --git a/src/commonTest/kotlin/com/xebia/functional/config/ConfigSpec.kt b/src/commonTest/kotlin/com/xebia/functional/config/ConfigSpec.kt new file mode 100644 index 000000000..830353448 --- /dev/null +++ b/src/commonTest/kotlin/com/xebia/functional/config/ConfigSpec.kt @@ -0,0 +1,4 @@ +package com.xebia.functional.config + +class ConfigSpec { +} \ No newline at end of file diff --git a/src/jsMain/kotlin/com/xebia/functional/config/getenv.kt b/src/jsMain/kotlin/com/xebia/functional/config/getenv.kt new file mode 100644 index 000000000..6201607a7 --- /dev/null +++ b/src/jsMain/kotlin/com/xebia/functional/config/getenv.kt @@ -0,0 +1,2 @@ +package com.xebia.functional.config + diff --git a/src/jvmMain/kotlin/com/xebia/functional/config/getenv.kt b/src/jvmMain/kotlin/com/xebia/functional/config/getenv.kt new file mode 100644 index 000000000..6201607a7 --- /dev/null +++ b/src/jvmMain/kotlin/com/xebia/functional/config/getenv.kt @@ -0,0 +1,2 @@ +package com.xebia.functional.config + diff --git a/src/nativeMain/kotlin/com/xebia/functional/config/getenv.kt b/src/nativeMain/kotlin/com/xebia/functional/config/getenv.kt new file mode 100644 index 000000000..6201607a7 --- /dev/null +++ b/src/nativeMain/kotlin/com/xebia/functional/config/getenv.kt @@ -0,0 +1,2 @@ +package com.xebia.functional.config + From 931a7a4e143bcab30e9305a820b86130d600a28e Mon Sep 17 00:00:00 2001 From: Simon Vergauwen Date: Wed, 26 Apr 2023 15:37:35 +0200 Subject: [PATCH 2/9] WIP --- build.gradle.kts | 12 ++++ gradle/libs.versions.toml | 3 + .../xebia/functional/embeddings/Embeddings.kt | 10 +++ .../functional/embeddings/OpenAIEmbeddings.kt | 52 +++++++++++++++ .../com/xebia/functional/llm/openai/models.kt | 13 ++++ .../kotlin/com/xebia/functional/model.kt | 3 + .../vectorstores/InMemoryVectorStore.kt | 54 ++++++++++++++++ .../functional/vectorstores/VectorStore.kt | 45 +++++++++++++ .../xebia/functional/vectorstores/postgres.kt | 54 ++++++++++++++++ .../com/xebia/functional/VectorStore.sq | 63 +++++++++++++++++++ .../com/xebia/functional/JDBCVectorStore.kt | 27 ++++++++ 11 files changed, 336 insertions(+) create mode 100644 src/commonMain/kotlin/com/xebia/functional/embeddings/Embeddings.kt create mode 100644 src/commonMain/kotlin/com/xebia/functional/embeddings/OpenAIEmbeddings.kt create mode 100644 src/commonMain/kotlin/com/xebia/functional/model.kt create mode 100644 src/commonMain/kotlin/com/xebia/functional/vectorstores/InMemoryVectorStore.kt create mode 100644 src/commonMain/kotlin/com/xebia/functional/vectorstores/VectorStore.kt create mode 100644 src/commonMain/kotlin/com/xebia/functional/vectorstores/postgres.kt create mode 100644 src/commonMain/sqldelight/com/xebia/functional/VectorStore.sq create mode 100644 src/jvmMain/kotlin/com/xebia/functional/JDBCVectorStore.kt diff --git a/build.gradle.kts b/build.gradle.kts index 49de77cc2..294d4a344 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -12,6 +12,7 @@ plugins { alias(libs.plugins.kotlin.multiplatform) alias(libs.plugins.spotless) alias(libs.plugins.kotlinx.serialization) + alias(libs.plugins.sqldelight) } java { @@ -51,6 +52,7 @@ kotlin { implementation(libs.kotlinx.serialization.json) implementation(libs.bundles.ktor.client) implementation(libs.okio) + implementation("app.softwork:kotlinx-uuid-core:0.0.18") } } @@ -76,3 +78,13 @@ spotless { ktfmt().googleStyle() } } + +sqldelight { + databases { + create("SqlDelightVectorStore") { + packageName.set("com.xebia.functional") + dialect(libs.postgres.get()) + } + } + linkSqlite.set(false) +} diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 60bd1c084..17b684c60 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -8,6 +8,7 @@ spotless = "6.18.0" okio = "3.3.0" kotest = "5.5.4" kotest-arrow = "1.3.0" +sqldelight="2.0.0-alpha05" [libraries] arrow-fx = { module = "io.arrow-kt:arrow-fx-coroutines", version.ref = "arrow" } @@ -24,6 +25,7 @@ kotest-framework = { module = "io.kotest:kotest-framework-engine", version.ref = kotest-property = { module = "io.kotest:kotest-property", version.ref = "kotest" } kotest-junit5 = { module = "io.kotest:kotest-runner-junit5", version.ref = "kotest" } kotest-assertions-arrow = { module = "io.kotest.extensions:kotest-assertions-arrow", version.ref = "kotest-arrow" } +postgres = { module = "app.cash.sqldelight:postgresql-dialect", version.ref="sqldelight" } [bundles] ktor-client = [ @@ -36,3 +38,4 @@ ktor-client = [ kotlin-multiplatform = { id = "org.jetbrains.kotlin.multiplatform", version.ref = "kotlin" } kotlinx-serialization = { id = "org.jetbrains.kotlin.plugin.serialization", version.ref = "kotlin" } spotless = { id = "com.diffplug.spotless", version.ref = "spotless" } +sqldelight = { id = "app.cash.sqldelight", version.ref = "sqldelight" } diff --git a/src/commonMain/kotlin/com/xebia/functional/embeddings/Embeddings.kt b/src/commonMain/kotlin/com/xebia/functional/embeddings/Embeddings.kt new file mode 100644 index 000000000..936e7019a --- /dev/null +++ b/src/commonMain/kotlin/com/xebia/functional/embeddings/Embeddings.kt @@ -0,0 +1,10 @@ +package com.xebia.functional.embeddings + +import com.xebia.functional.llm.openai.RequestConfig + +data class Embedding(val data: List) + +interface Embeddings { + fun embedDocuments(texts: List, chunkSize: Int?, config: RequestConfig): List + fun embedQuery(text: String, config: RequestConfig): List +} diff --git a/src/commonMain/kotlin/com/xebia/functional/embeddings/OpenAIEmbeddings.kt b/src/commonMain/kotlin/com/xebia/functional/embeddings/OpenAIEmbeddings.kt new file mode 100644 index 000000000..f945a59b4 --- /dev/null +++ b/src/commonMain/kotlin/com/xebia/functional/embeddings/OpenAIEmbeddings.kt @@ -0,0 +1,52 @@ +package com.xebia.functional.embeddings + +import kotlinx.coroutines.flow.Flow +import kotlinx.coroutines.flow.flow +import kotlinx.coroutines.flow.toList +import mu.KotlinLogging +import kotlin.time.ExperimentalTime +import kotlin.time.seconds + +@ExperimentalTime +class OpenAIEmbeddings(private val config: OpenAIConfig, private val oaiClient: OpenAIClient, private val logger: KotlinLogging) : Embeddings { + + override suspend fun embedQuery(text: String, rc: RequestConfig): List { + return if (text.isNotEmpty()) embedDocuments(listOf(text), null, rc) + else emptyList() + } + + override suspend fun embedDocuments(texts: List, chunkSize: Int?, rc: RequestConfig): List { + return chunkedEmbedDocuments(texts, chunkSize ?: config.chunkSize, rc) + } + + private suspend fun chunkedEmbedDocuments(texts: List, chunkSize: Int, rc: RequestConfig): List { + if (texts.isEmpty()) return emptyList() + + val batches = texts.chunked(chunkSize) + val embeddings = mutableListOf() + batches.forEach { batch -> + val vectors = embedWithRetry(batch, rc) + embeddings.addAll(vectors) + } + return embeddings + } + + private suspend fun embedWithRetry(texts: List, rc: RequestConfig): List { + val result = retryingOnAllErrors( + policy = limitRetries(config.maxRetries) + exponentialBackoff(config.backoff), + onError = ::logError + ) { + oaiClient.createEmbeddings(EmbeddingRequest(rc.model.name, texts, rc.user.asString)) + } + return result.data.map { Embedding(it.embedding) } + } + + private suspend fun logError(err: Throwable, details: RetryDetails): Unit = when (details) { + is WillDelayAndRetry -> { + logger.warn { "Open AI call failed. So far we have retried ${details.retriesSoFar} times." } + } + is GivingUp -> { + logger.warn { "Open AI call failed. Giving up after ${details.totalRetries} retries" } + } + } +} diff --git a/src/commonMain/kotlin/com/xebia/functional/llm/openai/models.kt b/src/commonMain/kotlin/com/xebia/functional/llm/openai/models.kt index 9e480df88..90c802bc7 100644 --- a/src/commonMain/kotlin/com/xebia/functional/llm/openai/models.kt +++ b/src/commonMain/kotlin/com/xebia/functional/llm/openai/models.kt @@ -1,8 +1,21 @@ package com.xebia.functional.llm.openai +import kotlin.jvm.JvmInline import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable +enum class EmbeddingModel(name: String) { + TextEmbeddingAda002("text-embedding-ada-002") +} + +data class RequestConfig(val model: EmbeddingModel, val user: User) { + companion object { + @JvmInline + value class User(val id: String) + } +} + + @Serializable data class CompletionChoice(val text: String, val index: Int, val finishReason: String) diff --git a/src/commonMain/kotlin/com/xebia/functional/model.kt b/src/commonMain/kotlin/com/xebia/functional/model.kt new file mode 100644 index 000000000..532e9b5e0 --- /dev/null +++ b/src/commonMain/kotlin/com/xebia/functional/model.kt @@ -0,0 +1,3 @@ +package com.xebia.functional + + data class Document(val content: String) \ No newline at end of file diff --git a/src/commonMain/kotlin/com/xebia/functional/vectorstores/InMemoryVectorStore.kt b/src/commonMain/kotlin/com/xebia/functional/vectorstores/InMemoryVectorStore.kt new file mode 100644 index 000000000..bd4787554 --- /dev/null +++ b/src/commonMain/kotlin/com/xebia/functional/vectorstores/InMemoryVectorStore.kt @@ -0,0 +1,54 @@ +//package com.xebia.functional.vectorstores +// +//import com.xebia.functional.Document +//import com.xebia.functional.embeddings.Embedding +//import kotlin.math.sqrt +// +//class InMemoryVectorStore : VectorStore { +// +// private val documents = mutableMapOf() +// +// override fun addTexts(texts: List): List { +// val documentVectors = texts.map { embeddings.embedText(it) } +// val ids = documentVectors.indices.map { it + 1 }.toList() +// documents.putAll(ids.zip(documentVectors).toMap()) +// return ids +// } +// +// override fun addDocuments(documents: List): List { +// val documentVectors = documents.map { embeddings.embedText(it.content) } +// val ids = documentVectors.indices.map { it + 1 }.toList() +// this.documents.putAll(ids.zip(documentVectors).toMap()) +// return ids +// } +// +// override fun similaritySearch(query: String, limit: Int): List { +// val queryVector = embeddings.embedText(query) +// val results = documents.toList().map { (id, vector) -> +// Pair(id, cosineSimilarity(vector, queryVector)) +// }.sortedByDescending { it.second }.take(limit) +// return results.map { Document(it.first.toString(), "") } +// } +// +// override fun similaritySearchByVector(embedding: Embedding, k: Int): List { +// val results = documents.map { (id, vector) -> +// Pair(id, cosineSimilarity(vector, embedding.data)) +// }.sortedByDescending { it.second }.take(k) +// return results.map { Document(it.first.toString(), "") } +// } +// +// private fun cosineSimilarity(v1: List, v2: List): Float { +// val freq1 = v1.groupingBy { it.id }.eachCount() +// val freq2 = v2.groupingBy { it.id }.eachCount() +// +// val dotProduct = freq1.filterKeys { freq2.containsKey(it) } +// .map { it.value * freq2.getValue(it.key) } +// .sum() +// +// val magnitude1 = sqrt(freq1.values.sumOf { it * it }.toDouble()).toFloat() +// val magnitude2 = sqrt(freq2.values.sumOf { it * it }.toDouble()).toFloat() +// +// return if (magnitude1 == 0f || magnitude2 == 0f) 0f +// else dotProduct / (magnitude1 * magnitude2) +// } +//} diff --git a/src/commonMain/kotlin/com/xebia/functional/vectorstores/VectorStore.kt b/src/commonMain/kotlin/com/xebia/functional/vectorstores/VectorStore.kt new file mode 100644 index 000000000..71f7308f8 --- /dev/null +++ b/src/commonMain/kotlin/com/xebia/functional/vectorstores/VectorStore.kt @@ -0,0 +1,45 @@ +package com.xebia.functional.vectorstores + +import com.xebia.functional.Document +import com.xebia.functional.embeddings.Embedding +import kotlin.jvm.JvmInline +import kotlinx.uuid.UUID + +@JvmInline +value class DocumentVectorId(val id: UUID) + +interface VectorStore { + /** + * Add texts to the vector store after running them through the embeddings + * + * @param texts list of text to add to the vector store + * @return a list of IDs from adding the texts to the vector store + */ + fun addTexts(texts: List): List + + /** + * Add documents to the vector store after running them through the embeddings + * + * @param documents list of Documents to add to the vector store + * @return a list of IDs from adding the documents to the vector store + */ + fun addDocuments(documents: List): List + + /** + * Return the docs most similar to the query + * + * @param query text to use to search for similar documents + * @param limit number of documents to return + * @return a list of Documents most similar to query + */ + fun similaritySearch(query: String, limit: Int): List + + /** + * Return the docs most similar to the embedding + * + * @param embedding embedding vector to use to search for similar documents + * @param k number of documents to return + * @return list of Documents most similar to the embedding + */ + fun similaritySearchByVector(embedding: Embedding, k: Int): List +} \ No newline at end of file diff --git a/src/commonMain/kotlin/com/xebia/functional/vectorstores/postgres.kt b/src/commonMain/kotlin/com/xebia/functional/vectorstores/postgres.kt new file mode 100644 index 000000000..d470c27fc --- /dev/null +++ b/src/commonMain/kotlin/com/xebia/functional/vectorstores/postgres.kt @@ -0,0 +1,54 @@ +package com.xebia.functional.vectorstores + +val createCollections = """CREATE TABLE langchain4k_collections ( + uuid TEXT PRIMARY KEY, + name TEXT UNIQUE NOT NULL + );""".trimIndent() + +val createEmbeddings = """CREATE TABLE langchain4s_embeddings ( + uuid TEXT PRIMARY KEY, + collection_id TEXT REFERENCES langchain4s_collections(uuid), + embedding BLOB, + content TEXT + );""".trimIndent() + +val addVectorExtension = "CREATE EXTENSION IF NOT EXISTS vector;" + +val createCollectionsTable = """CREATE TABLE IF NOT EXISTS langchain4s_collections ( + uuid TEXT PRIMARY KEY, + name TEXT UNIQUE NOT NULL + );""".trimIndent() + +val createEmbeddingTable = """CREATE TABLE IF NOT EXISTS langchain4s_embeddings ( + uid TEXT PRIMARY KEY, + ollection_id TEXT REFERENCES langchain4s_collections(uuid), + mbedding BLOB, + ontent TEXT + );""".trimIndent() + +val addNewCollection = """INSERT INTO langchain4s_collections(uuid, name) + VALUES (?, ?) + ON CONFLICT DO NOTHING;""".trimIndent() + +val deleteCollection = """DELETE FROM langchain4s_collections + WHERE uuid = ?;""".trimIndent() + +val getCollection = """SELECT * FROM langchain4s_collections + WHERE name = ?;""".trimIndent() + +val getCollectionById = """SELECT * FROM langchain4s_collections + WHERE uuid = ?;""".trimIndent() + +val addNewDocument = """INSERT INTO langchain4s_embeddings(uuid, collection_id, embedding, content) + VALUES (?, ?, ?, ?);""".trimIndent() + +val deleteCollectionDocs = """DELETE FROM langchain4s_embeddings + WHERE collection_id = ?;""".trimIndent() + +val addNewText = """INSERT INTO langchain4s_embeddings(uuid, collection_id, embedding, content) + VALUES (?, ?, ?, ?);""".trimIndent() + +val searchSimilarDocument = """SELECT content FROM langchain4s_embeddings + WHERE collection_id = ? + ORDER BY embedding || ?::vector + LIMIT ?;""".trimIndent() diff --git a/src/commonMain/sqldelight/com/xebia/functional/VectorStore.sq b/src/commonMain/sqldelight/com/xebia/functional/VectorStore.sq new file mode 100644 index 000000000..dad81b3b1 --- /dev/null +++ b/src/commonMain/sqldelight/com/xebia/functional/VectorStore.sq @@ -0,0 +1,63 @@ +CREATE TABLE langchain4s_collections ( + uuid TEXT PRIMARY KEY, + name TEXT UNIQUE NOT NULL +); + +CREATE TABLE langchain4s_embeddings ( + uuid TEXT PRIMARY KEY, + collection_id TEXT REFERENCES langchain4s_collections(uuid), + embedding BLOB, + content TEXT +); + +addVectorExtension: +CREATE EXTENSION IF NOT EXISTS vector; + +createCollectionsTable: +CREATE TABLE IF NOT EXISTS langchain4s_collections ( + uuid TEXT PRIMARY KEY, + name TEXT UNIQUE NOT NULL +); + +createEmbeddingTable: +CREATE TABLE IF NOT EXISTS langchain4s_embeddings ( + uuid TEXT PRIMARY KEY, + collection_id TEXT REFERENCES langchain4s_collections(uuid), + embedding BLOB, + content TEXT +); + +addNewCollection: +INSERT INTO langchain4s_collections(uuid, name) +VALUES (?, ?) +ON CONFLICT DO NOTHING; + +deleteCollection: +DELETE FROM langchain4s_collections +WHERE uuid = ?; + +getCollection: +SELECT * FROM langchain4s_collections +WHERE name = ?; + +getCollectionById: +SELECT * FROM langchain4s_collections +WHERE uuid = ?; + +addNewDocument: +INSERT INTO langchain4s_embeddings(uuid, collection_id, embedding, content) +VALUES (?, ?, ?, ?); + +deleteCollectionDocs: +DELETE FROM langchain4s_embeddings +WHERE collection_id = ?; + +addNewText: +INSERT INTO langchain4s_embeddings(uuid, collection_id, embedding, content) +VALUES (?, ?, ?, ?); + +searchSimilarDocument: +SELECT content FROM langchain4s_embeddings +WHERE collection_id = ? +ORDER BY embedding || ?::vector +LIMIT ?; \ No newline at end of file diff --git a/src/jvmMain/kotlin/com/xebia/functional/JDBCVectorStore.kt b/src/jvmMain/kotlin/com/xebia/functional/JDBCVectorStore.kt new file mode 100644 index 000000000..865955839 --- /dev/null +++ b/src/jvmMain/kotlin/com/xebia/functional/JDBCVectorStore.kt @@ -0,0 +1,27 @@ +package com.xebia.functional + +import com.xebia.functional.embeddings.Embedding +import com.xebia.functional.vectorstores.DocumentVectorId +import com.xebia.functional.vectorstores.VectorStore +import javax.sql.DataSource + +class JDBCVectorStore private constructor( + val dataSource: DataSource +): VectorStore { + override fun addTexts(texts: List): List { + TODO("Not yet implemented") + } + + override fun addDocuments(documents: List): List { + TODO("Not yet implemented") + } + + override fun similaritySearch(query: String, limit: Int): List { + TODO("Not yet implemented") + } + + override fun similaritySearchByVector(embedding: Embedding, k: Int): List { + TODO("Not yet implemented") + } + +} \ No newline at end of file From 633bcb42b57f93421fecf8b80c68dc743961b96f Mon Sep 17 00:00:00 2001 From: Simon Vergauwen Date: Thu, 27 Apr 2023 15:01:49 +0200 Subject: [PATCH 3/9] Write tests, and make fixes --- build.gradle.kts | 28 +++-- gradle/libs.versions.toml | 16 ++- .../com/xebia/functional/config/config.kt | 4 - .../kotlin/com/xebia/functional/config/env.kt | 4 - .../xebia/functional/embeddings/Embeddings.kt | 8 +- .../functional/embeddings/OpenAIEmbeddings.kt | 91 ++++++++------- .../kotlin/com/xebia/functional/env/config.kt | 6 +- .../com/xebia/functional/llm/openai/models.kt | 2 +- .../functional/vectorstores/VectorStore.kt | 8 +- .../xebia/functional/vectorstores/postgres.kt | 45 +++++--- .../com/xebia/functional/VectorStore.sq | 63 ---------- .../com/xebia/functional/embeddings/Mock.kt | 23 ++++ .../kotlin/com/xebia/functional/JDBCSyntax.kt | 108 ++++++++++++++++++ .../com/xebia/functional/JDBCVectorStore.kt | 27 ----- .../com/xebia/functional/PGVectorStore.kt | 97 ++++++++++++++++ .../com/xebia/functional/PGVectorStoreSpec.kt | 91 +++++++++++++++ 16 files changed, 429 insertions(+), 192 deletions(-) delete mode 100644 src/commonMain/kotlin/com/xebia/functional/config/config.kt delete mode 100644 src/commonMain/kotlin/com/xebia/functional/config/env.kt delete mode 100644 src/commonMain/sqldelight/com/xebia/functional/VectorStore.sq create mode 100644 src/commonTest/kotlin/com/xebia/functional/embeddings/Mock.kt create mode 100644 src/jvmMain/kotlin/com/xebia/functional/JDBCSyntax.kt delete mode 100644 src/jvmMain/kotlin/com/xebia/functional/JDBCVectorStore.kt create mode 100644 src/jvmMain/kotlin/com/xebia/functional/PGVectorStore.kt create mode 100644 src/jvmTest/kotlin/com/xebia/functional/PGVectorStoreSpec.kt diff --git a/build.gradle.kts b/build.gradle.kts index 294d4a344..09688fe2d 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -12,18 +12,17 @@ plugins { alias(libs.plugins.kotlin.multiplatform) alias(libs.plugins.spotless) alias(libs.plugins.kotlinx.serialization) - alias(libs.plugins.sqldelight) } java { - sourceCompatibility = JavaVersion.VERSION_11 - targetCompatibility = JavaVersion.VERSION_11 + sourceCompatibility = JavaVersion.VERSION_17 + targetCompatibility = JavaVersion.VERSION_17 } kotlin { jvm { compilations.all { - kotlinOptions.jvmTarget = JavaVersion.VERSION_11.majorVersion + kotlinOptions.jvmTarget = JavaVersion.VERSION_17.majorVersion } withJava() testRuns["test"].executionTask.configure { @@ -52,7 +51,8 @@ kotlin { implementation(libs.kotlinx.serialization.json) implementation(libs.bundles.ktor.client) implementation(libs.okio) - implementation("app.softwork:kotlinx-uuid-core:0.0.18") + implementation(libs.uuid) + implementation(libs.klogging) } } @@ -65,9 +65,17 @@ kotlin { implementation(libs.kotest.assertions.arrow) } } + val jvmMain by getting { + dependencies { + implementation(libs.hikari) + implementation(libs.postgresql) + } + } val jvmTest by getting { dependencies { implementation(libs.kotest.junit5) + implementation(libs.kotest.testcontainers) + implementation(libs.testcontainers.postgresql) } } } @@ -78,13 +86,3 @@ spotless { ktfmt().googleStyle() } } - -sqldelight { - databases { - create("SqlDelightVectorStore") { - packageName.set("com.xebia.functional") - dialect(libs.postgres.get()) - } - } - linkSqlite.set(false) -} diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 17b684c60..d611e920e 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -7,8 +7,13 @@ ktor = "2.2.2" spotless = "6.18.0" okio = "3.3.0" kotest = "5.5.4" +kotest-testcontainers = "1.3.4" kotest-arrow = "1.3.0" -sqldelight="2.0.0-alpha05" +klogging = "4.0.0-beta-22" +uuid = "0.0.18" +postgresql = "42.5.1" +testcontainers = "1.17.6" +hikari = "5.0.1" [libraries] arrow-fx = { module = "io.arrow-kt:arrow-fx-coroutines", version.ref = "arrow" } @@ -24,8 +29,14 @@ kotest-assertions = { module = "io.kotest:kotest-assertions-core", version.ref = kotest-framework = { module = "io.kotest:kotest-framework-engine", version.ref = "kotest" } kotest-property = { module = "io.kotest:kotest-property", version.ref = "kotest" } kotest-junit5 = { module = "io.kotest:kotest-runner-junit5", version.ref = "kotest" } +kotest-testcontainers = { module = "io.kotest.extensions:kotest-extensions-testcontainers", version.ref = "kotest-testcontainers" } kotest-assertions-arrow = { module = "io.kotest.extensions:kotest-assertions-arrow", version.ref = "kotest-arrow" } -postgres = { module = "app.cash.sqldelight:postgresql-dialect", version.ref="sqldelight" } +uuid = { module = "app.softwork:kotlinx-uuid-core", version.ref = "uuid" } +klogging = { module = "io.github.oshai:kotlin-logging", version.ref = "klogging" } + +hikari = { module = "com.zaxxer:HikariCP", version.ref = "hikari" } +postgresql = { module = "org.postgresql:postgresql", version.ref = "postgresql" } +testcontainers-postgresql = { module = "org.testcontainers:postgresql", version.ref = "testcontainers" } [bundles] ktor-client = [ @@ -38,4 +49,3 @@ ktor-client = [ kotlin-multiplatform = { id = "org.jetbrains.kotlin.multiplatform", version.ref = "kotlin" } kotlinx-serialization = { id = "org.jetbrains.kotlin.plugin.serialization", version.ref = "kotlin" } spotless = { id = "com.diffplug.spotless", version.ref = "spotless" } -sqldelight = { id = "app.cash.sqldelight", version.ref = "sqldelight" } diff --git a/src/commonMain/kotlin/com/xebia/functional/config/config.kt b/src/commonMain/kotlin/com/xebia/functional/config/config.kt deleted file mode 100644 index 8531464ba..000000000 --- a/src/commonMain/kotlin/com/xebia/functional/config/config.kt +++ /dev/null @@ -1,4 +0,0 @@ -package com.xebia.functional.config - -class config { -} \ No newline at end of file diff --git a/src/commonMain/kotlin/com/xebia/functional/config/env.kt b/src/commonMain/kotlin/com/xebia/functional/config/env.kt deleted file mode 100644 index 4213496f6..000000000 --- a/src/commonMain/kotlin/com/xebia/functional/config/env.kt +++ /dev/null @@ -1,4 +0,0 @@ -package com.xebia.functional.config - -class env { -} \ No newline at end of file diff --git a/src/commonMain/kotlin/com/xebia/functional/embeddings/Embeddings.kt b/src/commonMain/kotlin/com/xebia/functional/embeddings/Embeddings.kt index 936e7019a..49c074bb3 100644 --- a/src/commonMain/kotlin/com/xebia/functional/embeddings/Embeddings.kt +++ b/src/commonMain/kotlin/com/xebia/functional/embeddings/Embeddings.kt @@ -2,9 +2,11 @@ package com.xebia.functional.embeddings import com.xebia.functional.llm.openai.RequestConfig -data class Embedding(val data: List) +data class Embedding(val data: List) interface Embeddings { - fun embedDocuments(texts: List, chunkSize: Int?, config: RequestConfig): List - fun embedQuery(text: String, config: RequestConfig): List + suspend fun embedDocuments(texts: List, chunkSize: Int?, requestConfig: RequestConfig): List + suspend fun embedQuery(text: String, requestConfig: RequestConfig): List + + companion object } diff --git a/src/commonMain/kotlin/com/xebia/functional/embeddings/OpenAIEmbeddings.kt b/src/commonMain/kotlin/com/xebia/functional/embeddings/OpenAIEmbeddings.kt index f945a59b4..f80743281 100644 --- a/src/commonMain/kotlin/com/xebia/functional/embeddings/OpenAIEmbeddings.kt +++ b/src/commonMain/kotlin/com/xebia/functional/embeddings/OpenAIEmbeddings.kt @@ -1,52 +1,51 @@ package com.xebia.functional.embeddings -import kotlinx.coroutines.flow.Flow -import kotlinx.coroutines.flow.flow -import kotlinx.coroutines.flow.toList -import mu.KotlinLogging +import arrow.fx.coroutines.parMap +import arrow.resilience.retry +import com.xebia.functional.env.OpenAIConfig +import com.xebia.functional.llm.openai.EmbeddingRequest +import com.xebia.functional.llm.openai.OpenAIClient +import com.xebia.functional.llm.openai.RequestConfig +import io.github.oshai.KLogger import kotlin.time.ExperimentalTime -import kotlin.time.seconds @ExperimentalTime -class OpenAIEmbeddings(private val config: OpenAIConfig, private val oaiClient: OpenAIClient, private val logger: KotlinLogging) : Embeddings { - - override suspend fun embedQuery(text: String, rc: RequestConfig): List { - return if (text.isNotEmpty()) embedDocuments(listOf(text), null, rc) - else emptyList() - } - - override suspend fun embedDocuments(texts: List, chunkSize: Int?, rc: RequestConfig): List { - return chunkedEmbedDocuments(texts, chunkSize ?: config.chunkSize, rc) - } - - private suspend fun chunkedEmbedDocuments(texts: List, chunkSize: Int, rc: RequestConfig): List { - if (texts.isEmpty()) return emptyList() - - val batches = texts.chunked(chunkSize) - val embeddings = mutableListOf() - batches.forEach { batch -> - val vectors = embedWithRetry(batch, rc) - embeddings.addAll(vectors) - } - return embeddings - } - - private suspend fun embedWithRetry(texts: List, rc: RequestConfig): List { - val result = retryingOnAllErrors( - policy = limitRetries(config.maxRetries) + exponentialBackoff(config.backoff), - onError = ::logError - ) { - oaiClient.createEmbeddings(EmbeddingRequest(rc.model.name, texts, rc.user.asString)) - } - return result.data.map { Embedding(it.embedding) } - } - - private suspend fun logError(err: Throwable, details: RetryDetails): Unit = when (details) { - is WillDelayAndRetry -> { - logger.warn { "Open AI call failed. So far we have retried ${details.retriesSoFar} times." } - } - is GivingUp -> { - logger.warn { "Open AI call failed. Giving up after ${details.totalRetries} retries" } +class OpenAIEmbeddings( + private val config: OpenAIConfig, + private val oaiClient: OpenAIClient, + private val logger: KLogger +) : Embeddings { + + override suspend fun embedDocuments( + texts: List, + chunkSize: Int?, + requestConfig: RequestConfig + ): List = + chunkedEmbedDocuments(texts, chunkSize ?: config.chunkSize, requestConfig) + + override suspend fun embedQuery(text: String, requestConfig: RequestConfig): List = + if (text.isNotEmpty()) embedDocuments(listOf(text), null, requestConfig) else emptyList() + + private suspend fun chunkedEmbedDocuments( + texts: List, + chunkSize: Int, + requestConfig: RequestConfig + ): List = + if (texts.isEmpty()) emptyList() + else texts.chunked(chunkSize) + .parMap { withRetry(it, requestConfig) } + .flatten() + + private suspend fun withRetry(texts: List, requestConfig: RequestConfig): List = + kotlin.runCatching { + config.retryConfig.schedule() + .log { retriesSoFar, _ -> logger.warn { "Open AI call failed. So far we have retried $retriesSoFar times." } } + .retry { + oaiClient.createEmbeddings(EmbeddingRequest(requestConfig.model.name, texts, requestConfig.user.id)) + .data.map { Embedding(it.embedding) } + } + }.getOrElse { + logger.warn { "Open AI call failed. Giving up after ${config.retryConfig.maxRetries} retries" } + throw it } - } -} +} \ No newline at end of file diff --git a/src/commonMain/kotlin/com/xebia/functional/env/config.kt b/src/commonMain/kotlin/com/xebia/functional/env/config.kt index 9cd8e6897..0f77dd465 100644 --- a/src/commonMain/kotlin/com/xebia/functional/env/config.kt +++ b/src/commonMain/kotlin/com/xebia/functional/env/config.kt @@ -17,11 +17,9 @@ data class Env(val openAI: OpenAIConfig, val huggingFace: HuggingFaceConfig) data class OpenAIConfig(val token: String, val chunkSize: Int, val retryConfig: RetryConfig) data class RetryConfig(val backoff: Duration, val maxRetries: Long) { - fun schedule(): Schedule = + fun schedule(): Schedule = Schedule.recurs(maxRetries) - .and(Schedule.exponential(backoff)) - .jittered(0.75, 1.25) - .map { } + .zipLeft(Schedule.exponential(backoff).jittered(0.75, 1.25)) } data class HuggingFaceConfig(val token: String, val baseUrl: KUrl) diff --git a/src/commonMain/kotlin/com/xebia/functional/llm/openai/models.kt b/src/commonMain/kotlin/com/xebia/functional/llm/openai/models.kt index 90c802bc7..fc8316380 100644 --- a/src/commonMain/kotlin/com/xebia/functional/llm/openai/models.kt +++ b/src/commonMain/kotlin/com/xebia/functional/llm/openai/models.kt @@ -51,7 +51,7 @@ data class EmbeddingResult( ) @Serializable -class Embedding(val `object`: String, val embedding: List, val index: Int) +class Embedding(val `object`: String, val embedding: List, val index: Int) @Serializable data class Usage( diff --git a/src/commonMain/kotlin/com/xebia/functional/vectorstores/VectorStore.kt b/src/commonMain/kotlin/com/xebia/functional/vectorstores/VectorStore.kt index 71f7308f8..cfd7861ec 100644 --- a/src/commonMain/kotlin/com/xebia/functional/vectorstores/VectorStore.kt +++ b/src/commonMain/kotlin/com/xebia/functional/vectorstores/VectorStore.kt @@ -15,7 +15,7 @@ interface VectorStore { * @param texts list of text to add to the vector store * @return a list of IDs from adding the texts to the vector store */ - fun addTexts(texts: List): List + suspend fun addTexts(texts: List): List /** * Add documents to the vector store after running them through the embeddings @@ -23,7 +23,7 @@ interface VectorStore { * @param documents list of Documents to add to the vector store * @return a list of IDs from adding the documents to the vector store */ - fun addDocuments(documents: List): List + suspend fun addDocuments(documents: List): List /** * Return the docs most similar to the query @@ -32,7 +32,7 @@ interface VectorStore { * @param limit number of documents to return * @return a list of Documents most similar to query */ - fun similaritySearch(query: String, limit: Int): List + suspend fun similaritySearch(query: String, limit: Int): List /** * Return the docs most similar to the embedding @@ -41,5 +41,5 @@ interface VectorStore { * @param k number of documents to return * @return list of Documents most similar to the embedding */ - fun similaritySearchByVector(embedding: Embedding, k: Int): List + suspend fun similaritySearchByVector(embedding: Embedding, limit: Int): List } \ No newline at end of file diff --git a/src/commonMain/kotlin/com/xebia/functional/vectorstores/postgres.kt b/src/commonMain/kotlin/com/xebia/functional/vectorstores/postgres.kt index d470c27fc..c95904a01 100644 --- a/src/commonMain/kotlin/com/xebia/functional/vectorstores/postgres.kt +++ b/src/commonMain/kotlin/com/xebia/functional/vectorstores/postgres.kt @@ -1,54 +1,63 @@ package com.xebia.functional.vectorstores +import kotlinx.uuid.UUID + +data class PGCollection(val uuid: UUID, val collectionName: String) + +enum class PGDistanceStrategy(val strategy: String) { + Euclidean("<->"), InnerProduct("<#>"), CosineDistance("<=>") +} + val createCollections = """CREATE TABLE langchain4k_collections ( uuid TEXT PRIMARY KEY, name TEXT UNIQUE NOT NULL );""".trimIndent() -val createEmbeddings = """CREATE TABLE langchain4s_embeddings ( +val createEmbeddings = """CREATE TABLE langchain4k_embeddings ( uuid TEXT PRIMARY KEY, - collection_id TEXT REFERENCES langchain4s_collections(uuid), + collection_id TEXT REFERENCES langchain4k_collections(uuid), embedding BLOB, content TEXT );""".trimIndent() val addVectorExtension = "CREATE EXTENSION IF NOT EXISTS vector;" -val createCollectionsTable = """CREATE TABLE IF NOT EXISTS langchain4s_collections ( +val createCollectionsTable = """CREATE TABLE IF NOT EXISTS langchain4k_collections ( uuid TEXT PRIMARY KEY, name TEXT UNIQUE NOT NULL );""".trimIndent() -val createEmbeddingTable = """CREATE TABLE IF NOT EXISTS langchain4s_embeddings ( - uid TEXT PRIMARY KEY, - ollection_id TEXT REFERENCES langchain4s_collections(uuid), - mbedding BLOB, - ontent TEXT +fun createEmbeddingTable(vectorSize: Int) = """CREATE TABLE IF NOT EXISTS langchain4k_embeddings ( + uuid TEXT PRIMARY KEY, + collection_id TEXT REFERENCES langchain4k_collections(uuid), + embedding vector($vectorSize), + content TEXT );""".trimIndent() -val addNewCollection = """INSERT INTO langchain4s_collections(uuid, name) +val addNewCollection = """INSERT INTO langchain4k_collections(uuid, name) VALUES (?, ?) ON CONFLICT DO NOTHING;""".trimIndent() -val deleteCollection = """DELETE FROM langchain4s_collections +val deleteCollection = """DELETE FROM langchain4k_collections WHERE uuid = ?;""".trimIndent() -val getCollection = """SELECT * FROM langchain4s_collections +val getCollection = """SELECT * FROM langchain4k_collections WHERE name = ?;""".trimIndent() -val getCollectionById = """SELECT * FROM langchain4s_collections +val getCollectionById = """SELECT * FROM langchain4k_collections WHERE uuid = ?;""".trimIndent() -val addNewDocument = """INSERT INTO langchain4s_embeddings(uuid, collection_id, embedding, content) +val addNewDocument = """INSERT INTO langchain4k_embeddings(uuid, collection_id, embedding, content) VALUES (?, ?, ?, ?);""".trimIndent() -val deleteCollectionDocs = """DELETE FROM langchain4s_embeddings +val deleteCollectionDocs = """DELETE FROM langchain4k_embeddings WHERE collection_id = ?;""".trimIndent() -val addNewText = """INSERT INTO langchain4s_embeddings(uuid, collection_id, embedding, content) - VALUES (?, ?, ?, ?);""".trimIndent() +val addNewText = """INSERT INTO langchain4k_embeddings(uuid, collection_id, embedding, content) + VALUES (?, ?, ?::vector, ?);""".trimIndent() -val searchSimilarDocument = """SELECT content FROM langchain4s_embeddings +fun searchSimilarDocument(distance: PGDistanceStrategy) = """SELECT content FROM langchain4k_embeddings WHERE collection_id = ? - ORDER BY embedding || ?::vector + ORDER BY embedding + ${distance.strategy} ?::vector LIMIT ?;""".trimIndent() diff --git a/src/commonMain/sqldelight/com/xebia/functional/VectorStore.sq b/src/commonMain/sqldelight/com/xebia/functional/VectorStore.sq deleted file mode 100644 index dad81b3b1..000000000 --- a/src/commonMain/sqldelight/com/xebia/functional/VectorStore.sq +++ /dev/null @@ -1,63 +0,0 @@ -CREATE TABLE langchain4s_collections ( - uuid TEXT PRIMARY KEY, - name TEXT UNIQUE NOT NULL -); - -CREATE TABLE langchain4s_embeddings ( - uuid TEXT PRIMARY KEY, - collection_id TEXT REFERENCES langchain4s_collections(uuid), - embedding BLOB, - content TEXT -); - -addVectorExtension: -CREATE EXTENSION IF NOT EXISTS vector; - -createCollectionsTable: -CREATE TABLE IF NOT EXISTS langchain4s_collections ( - uuid TEXT PRIMARY KEY, - name TEXT UNIQUE NOT NULL -); - -createEmbeddingTable: -CREATE TABLE IF NOT EXISTS langchain4s_embeddings ( - uuid TEXT PRIMARY KEY, - collection_id TEXT REFERENCES langchain4s_collections(uuid), - embedding BLOB, - content TEXT -); - -addNewCollection: -INSERT INTO langchain4s_collections(uuid, name) -VALUES (?, ?) -ON CONFLICT DO NOTHING; - -deleteCollection: -DELETE FROM langchain4s_collections -WHERE uuid = ?; - -getCollection: -SELECT * FROM langchain4s_collections -WHERE name = ?; - -getCollectionById: -SELECT * FROM langchain4s_collections -WHERE uuid = ?; - -addNewDocument: -INSERT INTO langchain4s_embeddings(uuid, collection_id, embedding, content) -VALUES (?, ?, ?, ?); - -deleteCollectionDocs: -DELETE FROM langchain4s_embeddings -WHERE collection_id = ?; - -addNewText: -INSERT INTO langchain4s_embeddings(uuid, collection_id, embedding, content) -VALUES (?, ?, ?, ?); - -searchSimilarDocument: -SELECT content FROM langchain4s_embeddings -WHERE collection_id = ? -ORDER BY embedding || ?::vector -LIMIT ?; \ No newline at end of file diff --git a/src/commonTest/kotlin/com/xebia/functional/embeddings/Mock.kt b/src/commonTest/kotlin/com/xebia/functional/embeddings/Mock.kt new file mode 100644 index 000000000..d21954fa9 --- /dev/null +++ b/src/commonTest/kotlin/com/xebia/functional/embeddings/Mock.kt @@ -0,0 +1,23 @@ +package com.xebia.functional.embeddings + +import com.xebia.functional.llm.openai.RequestConfig + +fun Embeddings.Companion.mock( + embedDocuments: suspend (texts: List, chunkSize: Int?, config: RequestConfig) -> List = { _, _, _ -> + listOf(Embedding(listOf(1.0f, 2.0f, 3.0f)), Embedding(listOf(4.0f, 5.0f, 6.0f))) + }, + embedQuery: suspend (text: String, config: RequestConfig) -> List = { text, _ -> + when (text) { + "foo" -> listOf(Embedding(listOf(1.0f, 2.0f, 3.0f))) + "bar" -> listOf(Embedding(listOf(4.0f, 5.0f, 6.0f))) + "baz" -> listOf() + else -> listOf() + } + } +): Embeddings = object : Embeddings { + override suspend fun embedDocuments(texts: List, chunkSize: Int?, requestConfig: RequestConfig): List = + embedDocuments(texts, chunkSize, requestConfig) + + override suspend fun embedQuery(text: String, requestConfig: RequestConfig): List = + embedQuery(text, requestConfig) +} diff --git a/src/jvmMain/kotlin/com/xebia/functional/JDBCSyntax.kt b/src/jvmMain/kotlin/com/xebia/functional/JDBCSyntax.kt new file mode 100644 index 000000000..9beb5ee96 --- /dev/null +++ b/src/jvmMain/kotlin/com/xebia/functional/JDBCSyntax.kt @@ -0,0 +1,108 @@ +package com.xebia.functional + +import arrow.core.raise.NullableRaise +import arrow.core.raise.nullable +import arrow.fx.coroutines.ResourceScope +import arrow.fx.coroutines.autoCloseable +import arrow.fx.coroutines.resourceScope +import java.sql.Connection +import java.sql.PreparedStatement +import java.sql.ResultSet +import java.sql.Types +import javax.sql.DataSource + +suspend fun DataSource.connection(block: suspend JDBCSyntax.() -> A): A = + resourceScope { + val conn = autoCloseable { connection } + JDBCSyntax(conn, this).block() + } + +class JDBCSyntax(conn: Connection, resourceScope: ResourceScope) : ResourceScope by resourceScope, Connection by conn { + + suspend fun prepareStatement( + sql: String, + binders: (SqlPreparedStatement.() -> Unit)? = null + ): PreparedStatement = autoCloseable { + prepareStatement(sql) + .apply { if (binders != null) SqlPreparedStatement(this).binders() } + } + + suspend fun update( + sql: String, + binders: (SqlPreparedStatement.() -> Unit)? = null, + ): Unit { + val statement = prepareStatement(sql, binders) + statement.executeUpdate() + } + + suspend fun queryOneOrNull( + sql: String, + binders: (SqlPreparedStatement.() -> Unit)? = null, + mapper: NullableSqlCursor.() -> A + ): A? { + val statement = prepareStatement(sql, binders) + val rs = autoCloseable { statement.executeQuery() } + return if (rs.next()) nullable { mapper(NullableSqlCursor(rs, this)) } + else null + } + + suspend fun queryAsList( + sql: String, + binders: (SqlPreparedStatement.() -> Unit)? = null, + mapper: NullableSqlCursor.() -> A? + ): List { + val statement = prepareStatement(sql, binders) + println(statement.toString()) + val rs = autoCloseable { statement.executeQuery() } + return buildList { + while (rs.next()) { + nullable { mapper(NullableSqlCursor(rs, this)) }?.let(::add) + } + } + } + + class SqlPreparedStatement(private val preparedStatement: PreparedStatement) { + private var index: Int = 1 + + fun bind(short: Short?): Unit = bind(short?.toLong()) + fun bind(byte: Byte?): Unit = bind(byte?.toLong()) + fun bind(int: Int?): Unit = bind(int?.toLong()) + fun bind(char: Char?): Unit = bind(char?.toString()) + + fun bind(bytes: ByteArray?): Unit = + if (bytes == null) preparedStatement.setNull(index++, Types.BLOB) + else preparedStatement.setBytes(index++, bytes) + + fun bind(long: Long?): Unit = + if (long == null) preparedStatement.setNull(index++, Types.INTEGER) + else preparedStatement.setLong(index++, long) + + fun bind(double: Double?): Unit = + if (double == null) preparedStatement.setNull(index++, Types.REAL) + else preparedStatement.setDouble(index++, double) + + fun bind(string: String?): Unit = + if (string == null) preparedStatement.setNull(index++, Types.VARCHAR) + else preparedStatement.setString(index++, string) + } + + class SqlCursor(private val resultSet: ResultSet) { + private var index: Int = 1 + fun int(): Int? = long()?.toInt() + fun string(): String? = resultSet.getString(index++) + fun bytes(): ByteArray? = resultSet.getBytes(index++) + fun long(): Long? = resultSet.getLong(index++).takeUnless { resultSet.wasNull() } + fun double(): Double? = resultSet.getDouble(index++).takeUnless { resultSet.wasNull() } + fun nextRow(): Boolean = resultSet.next() + } + + class NullableSqlCursor(private val resultSet: ResultSet, private val raise: NullableRaise) { + private var index: Int = 1 + fun int(): Int = long().toInt() + fun string(): String = raise.ensureNotNull(resultSet.getString(index++)) + fun bytes(): ByteArray = raise.ensureNotNull(resultSet.getBytes(index++)) + fun long(): Long = raise.ensureNotNull(resultSet.getLong(index++).takeUnless { resultSet.wasNull() }) + fun double(): Double = raise.ensureNotNull(resultSet.getDouble(index++).takeUnless { resultSet.wasNull() }) + fun nextRow(): Boolean = resultSet.next() + } +} diff --git a/src/jvmMain/kotlin/com/xebia/functional/JDBCVectorStore.kt b/src/jvmMain/kotlin/com/xebia/functional/JDBCVectorStore.kt deleted file mode 100644 index 865955839..000000000 --- a/src/jvmMain/kotlin/com/xebia/functional/JDBCVectorStore.kt +++ /dev/null @@ -1,27 +0,0 @@ -package com.xebia.functional - -import com.xebia.functional.embeddings.Embedding -import com.xebia.functional.vectorstores.DocumentVectorId -import com.xebia.functional.vectorstores.VectorStore -import javax.sql.DataSource - -class JDBCVectorStore private constructor( - val dataSource: DataSource -): VectorStore { - override fun addTexts(texts: List): List { - TODO("Not yet implemented") - } - - override fun addDocuments(documents: List): List { - TODO("Not yet implemented") - } - - override fun similaritySearch(query: String, limit: Int): List { - TODO("Not yet implemented") - } - - override fun similaritySearchByVector(embedding: Embedding, k: Int): List { - TODO("Not yet implemented") - } - -} \ No newline at end of file diff --git a/src/jvmMain/kotlin/com/xebia/functional/PGVectorStore.kt b/src/jvmMain/kotlin/com/xebia/functional/PGVectorStore.kt new file mode 100644 index 000000000..4a48bd639 --- /dev/null +++ b/src/jvmMain/kotlin/com/xebia/functional/PGVectorStore.kt @@ -0,0 +1,97 @@ +package com.xebia.functional + +import com.xebia.functional.embeddings.Embedding +import com.xebia.functional.embeddings.Embeddings +import com.xebia.functional.llm.openai.RequestConfig +import com.xebia.functional.vectorstores.DocumentVectorId +import com.xebia.functional.vectorstores.PGCollection +import com.xebia.functional.vectorstores.PGDistanceStrategy +import com.xebia.functional.vectorstores.VectorStore +import com.xebia.functional.vectorstores.addNewCollection +import com.xebia.functional.vectorstores.addNewText +import com.xebia.functional.vectorstores.addVectorExtension +import com.xebia.functional.vectorstores.createCollectionsTable +import com.xebia.functional.vectorstores.createEmbeddingTable +import com.xebia.functional.vectorstores.deleteCollection +import com.xebia.functional.vectorstores.deleteCollectionDocs +import com.xebia.functional.vectorstores.getCollection +import com.xebia.functional.vectorstores.searchSimilarDocument +import javax.sql.DataSource +import kotlinx.uuid.UUID +import kotlinx.uuid.generateUUID + +class PGVectorStore( + private val vectorSize: Int, + private val dataSource: DataSource, + private val embeddings: Embeddings, + private val collectionName: String, + private val distanceStrategy: PGDistanceStrategy, + private val preDeleteCollection: Boolean, + private val requestConfig: RequestConfig, + private val chunckSize: Int? +) : VectorStore { + + suspend fun JDBCSyntax.getCollection(collectionName: String): PGCollection = + queryOneOrNull(getCollection, + { bind(collectionName) } + ) { PGCollection(UUID(string()), string()) } + ?: throw IllegalStateException("Collection '$collectionName' not found") + + suspend fun JDBCSyntax.deleteCollection() { + if (preDeleteCollection) { + val collection = getCollection(collectionName) + update(deleteCollectionDocs) { bind(collection.uuid.toString()) } + update(deleteCollection) { bind(collection.uuid.toString()) } + } + } + + suspend fun initialDbSetup(): Unit = dataSource.connection { + update(addVectorExtension) + update(createCollectionsTable) + update(createEmbeddingTable(vectorSize)) + deleteCollection() + } + + suspend fun createCollection(): Unit = dataSource.connection { + val xa = UUID.generateUUID() + update(addNewCollection) { bind(xa.toString()); bind(collectionName) } + } + + override suspend fun addTexts(texts: List): List = dataSource.connection { + val embeddings = embeddings.embedDocuments(texts, chunckSize, requestConfig) + val collection = getCollection(collectionName) + texts.zip(embeddings) { text, embedding -> + val uuid = UUID.generateUUID() + update(addNewText) { + bind(uuid.toString()) + bind(collection.uuid.toString()) + bind(embedding.data.toString()) + bind(text) + } + DocumentVectorId(uuid) + } + } + + override suspend fun addDocuments(documents: List): List = + addTexts(documents.map(Document::content)) + + override suspend fun similaritySearch(query: String, limit: Int): List = dataSource.connection { + val embeddings = embeddings.embedQuery(query, requestConfig).ifEmpty { throw IllegalStateException("Embedding for text: '$query', has not been properly generated") } + val collection = getCollection(collectionName) + queryAsList(searchSimilarDocument(distanceStrategy), { + bind(collection.uuid.toString()) + bind(embeddings[0].data.toString()) + bind(limit) + }) { Document(string()) } + } + + override suspend fun similaritySearchByVector(embedding: Embedding, limit: Int): List = + dataSource.connection { + val collection = getCollection(collectionName) + queryAsList(searchSimilarDocument(distanceStrategy), { + bind(collection.uuid.toString()) + bind(embedding.data.toString()) + bind(limit) + }) { Document(string()) } + } +} diff --git a/src/jvmTest/kotlin/com/xebia/functional/PGVectorStoreSpec.kt b/src/jvmTest/kotlin/com/xebia/functional/PGVectorStoreSpec.kt new file mode 100644 index 000000000..96d72873c --- /dev/null +++ b/src/jvmTest/kotlin/com/xebia/functional/PGVectorStoreSpec.kt @@ -0,0 +1,91 @@ +package com.xebia.functional + +import com.xebia.functional.embeddings.Embedding +import com.xebia.functional.embeddings.Embeddings +import com.xebia.functional.embeddings.mock +import com.xebia.functional.llm.openai.EmbeddingModel +import com.xebia.functional.llm.openai.RequestConfig +import com.xebia.functional.vectorstores.PGDistanceStrategy +import com.zaxxer.hikari.HikariConfig +import com.zaxxer.hikari.HikariDataSource +import io.kotest.core.extensions.install +import io.kotest.core.spec.style.StringSpec +import io.kotest.extensions.testcontainers.SharedTestContainerExtension +import io.kotest.matchers.shouldBe +import org.junit.jupiter.api.assertThrows +import org.testcontainers.containers.PostgreSQLContainer +import org.testcontainers.utility.DockerImageName + +val postgres: PostgreSQLContainer = + PostgreSQLContainer(DockerImageName.parse("ankane/pgvector").asCompatibleSubstituteFor("postgres")) + +class PGVectorStoreSpec : StringSpec({ + + val container = install(SharedTestContainerExtension(postgres)) + val dataSource = autoClose(HikariDataSource(HikariConfig().apply { + jdbcUrl = container.jdbcUrl + username = container.username + password = container.password + driverClassName = "org.postgresql.Driver" + })) + + val pg = PGVectorStore( + vectorSize = 3, + dataSource = dataSource, + embeddings = Embeddings.mock(), + collectionName = "test_collection", + distanceStrategy = PGDistanceStrategy.Euclidean, + preDeleteCollection = false, + requestConfig = RequestConfig(EmbeddingModel.TextEmbeddingAda002, RequestConfig.Companion.User("user")), + chunckSize = null + ) + + "initialDbSetup should configure the DB properly" { + pg.initialDbSetup() + } + + "addTexts should fail with a CollectionNotFoundError if collection isn't present in the DB" { + assertThrows { + pg.addTexts(listOf("foo", "bar")) + }.message shouldBe "Collection 'test_collection' not found" + } + + "similaritySearch should fail with a CollectionNotFoundError if collection isn't present in the DB" { + assertThrows { + pg.similaritySearch("foo", 2) + }.message shouldBe "Collection 'test_collection' not found" + } + + "createCollection should create collection" { + pg.createCollection() + } + + "addTexts should return a list of 2 elements" { + pg.addTexts(listOf("foo", "bar")).size shouldBe 2 + } + + "similaritySearchByVector should return both documents" { + pg.similaritySearchByVector(Embedding(listOf(4.0f, 5.0f, 6.0f)), 2) shouldBe listOf( + Document("bar"), + Document("foo") + ) + } + + "addDocuments should return a list of 2 elements" { + pg.addDocuments(listOf(Document("foo"), Document("bar"))).size shouldBe 2 + } + + "similaritySearch should return 2 documents" { + pg.similaritySearch("foo", 2).size shouldBe 2 + } + + "similaritySearch should fail when embedding vector is empty" { + assertThrows { + pg.similaritySearch("baz", 2) + }.message shouldBe "Embedding for text: 'baz', has not been properly generated" + } + + "similaritySearchByVector should return document" { + pg.similaritySearchByVector(Embedding(listOf(1.0f, 2.0f, 3.0f)), 1) shouldBe listOf(Document("foo")) + } +}) From 9759fbc8c2513749845d35da446a521bad6070bc Mon Sep 17 00:00:00 2001 From: Simon Vergauwen Date: Thu, 27 Apr 2023 15:07:27 +0200 Subject: [PATCH 4/9] Clean up --- build.gradle.kts | 6 +-- .../vectorstores/InMemoryVectorStore.kt | 54 ------------------- .../com/xebia/functional/config/getenv.kt | 2 - 3 files changed, 3 insertions(+), 59 deletions(-) delete mode 100644 src/commonMain/kotlin/com/xebia/functional/vectorstores/InMemoryVectorStore.kt delete mode 100644 src/nativeMain/kotlin/com/xebia/functional/config/getenv.kt diff --git a/build.gradle.kts b/build.gradle.kts index 09688fe2d..51a00b7bd 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -15,14 +15,14 @@ plugins { } java { - sourceCompatibility = JavaVersion.VERSION_17 - targetCompatibility = JavaVersion.VERSION_17 + sourceCompatibility = JavaVersion.VERSION_11 + targetCompatibility = JavaVersion.VERSION_11 } kotlin { jvm { compilations.all { - kotlinOptions.jvmTarget = JavaVersion.VERSION_17.majorVersion + kotlinOptions.jvmTarget = JavaVersion.VERSION_11.majorVersion } withJava() testRuns["test"].executionTask.configure { diff --git a/src/commonMain/kotlin/com/xebia/functional/vectorstores/InMemoryVectorStore.kt b/src/commonMain/kotlin/com/xebia/functional/vectorstores/InMemoryVectorStore.kt deleted file mode 100644 index bd4787554..000000000 --- a/src/commonMain/kotlin/com/xebia/functional/vectorstores/InMemoryVectorStore.kt +++ /dev/null @@ -1,54 +0,0 @@ -//package com.xebia.functional.vectorstores -// -//import com.xebia.functional.Document -//import com.xebia.functional.embeddings.Embedding -//import kotlin.math.sqrt -// -//class InMemoryVectorStore : VectorStore { -// -// private val documents = mutableMapOf() -// -// override fun addTexts(texts: List): List { -// val documentVectors = texts.map { embeddings.embedText(it) } -// val ids = documentVectors.indices.map { it + 1 }.toList() -// documents.putAll(ids.zip(documentVectors).toMap()) -// return ids -// } -// -// override fun addDocuments(documents: List): List { -// val documentVectors = documents.map { embeddings.embedText(it.content) } -// val ids = documentVectors.indices.map { it + 1 }.toList() -// this.documents.putAll(ids.zip(documentVectors).toMap()) -// return ids -// } -// -// override fun similaritySearch(query: String, limit: Int): List { -// val queryVector = embeddings.embedText(query) -// val results = documents.toList().map { (id, vector) -> -// Pair(id, cosineSimilarity(vector, queryVector)) -// }.sortedByDescending { it.second }.take(limit) -// return results.map { Document(it.first.toString(), "") } -// } -// -// override fun similaritySearchByVector(embedding: Embedding, k: Int): List { -// val results = documents.map { (id, vector) -> -// Pair(id, cosineSimilarity(vector, embedding.data)) -// }.sortedByDescending { it.second }.take(k) -// return results.map { Document(it.first.toString(), "") } -// } -// -// private fun cosineSimilarity(v1: List, v2: List): Float { -// val freq1 = v1.groupingBy { it.id }.eachCount() -// val freq2 = v2.groupingBy { it.id }.eachCount() -// -// val dotProduct = freq1.filterKeys { freq2.containsKey(it) } -// .map { it.value * freq2.getValue(it.key) } -// .sum() -// -// val magnitude1 = sqrt(freq1.values.sumOf { it * it }.toDouble()).toFloat() -// val magnitude2 = sqrt(freq2.values.sumOf { it * it }.toDouble()).toFloat() -// -// return if (magnitude1 == 0f || magnitude2 == 0f) 0f -// else dotProduct / (magnitude1 * magnitude2) -// } -//} diff --git a/src/nativeMain/kotlin/com/xebia/functional/config/getenv.kt b/src/nativeMain/kotlin/com/xebia/functional/config/getenv.kt deleted file mode 100644 index 6201607a7..000000000 --- a/src/nativeMain/kotlin/com/xebia/functional/config/getenv.kt +++ /dev/null @@ -1,2 +0,0 @@ -package com.xebia.functional.config - From 22f747f3d1ae02f84b2ab270a5077bd2afb584a8 Mon Sep 17 00:00:00 2001 From: Simon Vergauwen Date: Thu, 27 Apr 2023 15:10:38 +0200 Subject: [PATCH 5/9] Format queries --- .../xebia/functional/vectorstores/postgres.kt | 39 ++++++++++++------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/src/commonMain/kotlin/com/xebia/functional/vectorstores/postgres.kt b/src/commonMain/kotlin/com/xebia/functional/vectorstores/postgres.kt index c95904a01..2b7fcd5f8 100644 --- a/src/commonMain/kotlin/com/xebia/functional/vectorstores/postgres.kt +++ b/src/commonMain/kotlin/com/xebia/functional/vectorstores/postgres.kt @@ -8,55 +8,68 @@ enum class PGDistanceStrategy(val strategy: String) { Euclidean("<->"), InnerProduct("<#>"), CosineDistance("<=>") } -val createCollections = """CREATE TABLE langchain4k_collections ( +val createCollections: String = + """CREATE TABLE langchain4k_collections ( uuid TEXT PRIMARY KEY, name TEXT UNIQUE NOT NULL );""".trimIndent() -val createEmbeddings = """CREATE TABLE langchain4k_embeddings ( +val createEmbeddings: String = + """CREATE TABLE langchain4k_embeddings ( uuid TEXT PRIMARY KEY, collection_id TEXT REFERENCES langchain4k_collections(uuid), embedding BLOB, content TEXT );""".trimIndent() -val addVectorExtension = "CREATE EXTENSION IF NOT EXISTS vector;" +val addVectorExtension: String = + "CREATE EXTENSION IF NOT EXISTS vector;" -val createCollectionsTable = """CREATE TABLE IF NOT EXISTS langchain4k_collections ( +val createCollectionsTable: String = + """CREATE TABLE IF NOT EXISTS langchain4k_collections ( uuid TEXT PRIMARY KEY, name TEXT UNIQUE NOT NULL );""".trimIndent() -fun createEmbeddingTable(vectorSize: Int) = """CREATE TABLE IF NOT EXISTS langchain4k_embeddings ( +fun createEmbeddingTable(vectorSize: Int): String = + """CREATE TABLE IF NOT EXISTS langchain4k_embeddings ( uuid TEXT PRIMARY KEY, collection_id TEXT REFERENCES langchain4k_collections(uuid), embedding vector($vectorSize), content TEXT );""".trimIndent() -val addNewCollection = """INSERT INTO langchain4k_collections(uuid, name) +val addNewCollection: String = + """INSERT INTO langchain4k_collections(uuid, name) VALUES (?, ?) ON CONFLICT DO NOTHING;""".trimIndent() -val deleteCollection = """DELETE FROM langchain4k_collections +val deleteCollection: String = + """DELETE FROM langchain4k_collections WHERE uuid = ?;""".trimIndent() -val getCollection = """SELECT * FROM langchain4k_collections +val getCollection: String = + """SELECT * FROM langchain4k_collections WHERE name = ?;""".trimIndent() -val getCollectionById = """SELECT * FROM langchain4k_collections +val getCollectionById: String = + """SELECT * FROM langchain4k_collections WHERE uuid = ?;""".trimIndent() -val addNewDocument = """INSERT INTO langchain4k_embeddings(uuid, collection_id, embedding, content) +val addNewDocument: String = + """INSERT INTO langchain4k_embeddings(uuid, collection_id, embedding, content) VALUES (?, ?, ?, ?);""".trimIndent() -val deleteCollectionDocs = """DELETE FROM langchain4k_embeddings +val deleteCollectionDocs: String = + """DELETE FROM langchain4k_embeddings WHERE collection_id = ?;""".trimIndent() -val addNewText = """INSERT INTO langchain4k_embeddings(uuid, collection_id, embedding, content) +val addNewText: String = + """INSERT INTO langchain4k_embeddings(uuid, collection_id, embedding, content) VALUES (?, ?, ?::vector, ?);""".trimIndent() -fun searchSimilarDocument(distance: PGDistanceStrategy) = """SELECT content FROM langchain4k_embeddings +fun searchSimilarDocument(distance: PGDistanceStrategy): String = + """SELECT content FROM langchain4k_embeddings WHERE collection_id = ? ORDER BY embedding ${distance.strategy} ?::vector From 8e0c807c4ee81ea4e41fee420eab53c6a9195bb4 Mon Sep 17 00:00:00 2001 From: Simon Vergauwen Date: Thu, 27 Apr 2023 15:13:06 +0200 Subject: [PATCH 6/9] Remove single line using ; --- src/jvmMain/kotlin/com/xebia/functional/PGVectorStore.kt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/jvmMain/kotlin/com/xebia/functional/PGVectorStore.kt b/src/jvmMain/kotlin/com/xebia/functional/PGVectorStore.kt index 4a48bd639..679c017e4 100644 --- a/src/jvmMain/kotlin/com/xebia/functional/PGVectorStore.kt +++ b/src/jvmMain/kotlin/com/xebia/functional/PGVectorStore.kt @@ -54,7 +54,10 @@ class PGVectorStore( suspend fun createCollection(): Unit = dataSource.connection { val xa = UUID.generateUUID() - update(addNewCollection) { bind(xa.toString()); bind(collectionName) } + update(addNewCollection) { + bind(xa.toString()) + bind(collectionName) + } } override suspend fun addTexts(texts: List): List = dataSource.connection { @@ -76,7 +79,8 @@ class PGVectorStore( addTexts(documents.map(Document::content)) override suspend fun similaritySearch(query: String, limit: Int): List = dataSource.connection { - val embeddings = embeddings.embedQuery(query, requestConfig).ifEmpty { throw IllegalStateException("Embedding for text: '$query', has not been properly generated") } + val embeddings = embeddings.embedQuery(query, requestConfig) + .ifEmpty { throw IllegalStateException("Embedding for text: '$query', has not been properly generated") } val collection = getCollection(collectionName) queryAsList(searchSimilarDocument(distanceStrategy), { bind(collection.uuid.toString()) From c9bff87cf9005ab37bb00d460b1d220f93a83f96 Mon Sep 17 00:00:00 2001 From: Simon Vergauwen Date: Thu, 27 Apr 2023 17:14:31 +0200 Subject: [PATCH 7/9] Clean-up --- src/commonMain/kotlin/com/xebia/functional/model.kt | 2 +- .../kotlin/com/xebia/functional/config/ConfigSpec.kt | 4 ---- src/jsMain/kotlin/com/xebia/functional/config/getenv.kt | 2 -- src/jvmMain/kotlin/com/xebia/functional/config/getenv.kt | 2 -- 4 files changed, 1 insertion(+), 9 deletions(-) delete mode 100644 src/commonTest/kotlin/com/xebia/functional/config/ConfigSpec.kt delete mode 100644 src/jsMain/kotlin/com/xebia/functional/config/getenv.kt delete mode 100644 src/jvmMain/kotlin/com/xebia/functional/config/getenv.kt diff --git a/src/commonMain/kotlin/com/xebia/functional/model.kt b/src/commonMain/kotlin/com/xebia/functional/model.kt index 532e9b5e0..3d8164f57 100644 --- a/src/commonMain/kotlin/com/xebia/functional/model.kt +++ b/src/commonMain/kotlin/com/xebia/functional/model.kt @@ -1,3 +1,3 @@ package com.xebia.functional - data class Document(val content: String) \ No newline at end of file +data class Document(val content: String) diff --git a/src/commonTest/kotlin/com/xebia/functional/config/ConfigSpec.kt b/src/commonTest/kotlin/com/xebia/functional/config/ConfigSpec.kt deleted file mode 100644 index 830353448..000000000 --- a/src/commonTest/kotlin/com/xebia/functional/config/ConfigSpec.kt +++ /dev/null @@ -1,4 +0,0 @@ -package com.xebia.functional.config - -class ConfigSpec { -} \ No newline at end of file diff --git a/src/jsMain/kotlin/com/xebia/functional/config/getenv.kt b/src/jsMain/kotlin/com/xebia/functional/config/getenv.kt deleted file mode 100644 index 6201607a7..000000000 --- a/src/jsMain/kotlin/com/xebia/functional/config/getenv.kt +++ /dev/null @@ -1,2 +0,0 @@ -package com.xebia.functional.config - diff --git a/src/jvmMain/kotlin/com/xebia/functional/config/getenv.kt b/src/jvmMain/kotlin/com/xebia/functional/config/getenv.kt deleted file mode 100644 index 6201607a7..000000000 --- a/src/jvmMain/kotlin/com/xebia/functional/config/getenv.kt +++ /dev/null @@ -1,2 +0,0 @@ -package com.xebia.functional.config - From 5d47147d585d8ae0b3fd2c32bd9c3b40724d3f01 Mon Sep 17 00:00:00 2001 From: Simon Vergauwen Date: Thu, 27 Apr 2023 17:28:14 +0200 Subject: [PATCH 8/9] Apply feedback @franciscodr --- gradle/libs.versions.toml | 1 - .../com/xebia/functional/embeddings/OpenAIEmbeddings.kt | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index d611e920e..9c0b3e511 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -33,7 +33,6 @@ kotest-testcontainers = { module = "io.kotest.extensions:kotest-extensions-testc kotest-assertions-arrow = { module = "io.kotest.extensions:kotest-assertions-arrow", version.ref = "kotest-arrow" } uuid = { module = "app.softwork:kotlinx-uuid-core", version.ref = "uuid" } klogging = { module = "io.github.oshai:kotlin-logging", version.ref = "klogging" } - hikari = { module = "com.zaxxer:HikariCP", version.ref = "hikari" } postgresql = { module = "org.postgresql:postgresql", version.ref = "postgresql" } testcontainers-postgresql = { module = "org.testcontainers:postgresql", version.ref = "testcontainers" } diff --git a/src/commonMain/kotlin/com/xebia/functional/embeddings/OpenAIEmbeddings.kt b/src/commonMain/kotlin/com/xebia/functional/embeddings/OpenAIEmbeddings.kt index f80743281..63ababc01 100644 --- a/src/commonMain/kotlin/com/xebia/functional/embeddings/OpenAIEmbeddings.kt +++ b/src/commonMain/kotlin/com/xebia/functional/embeddings/OpenAIEmbeddings.kt @@ -33,10 +33,10 @@ class OpenAIEmbeddings( ): List = if (texts.isEmpty()) emptyList() else texts.chunked(chunkSize) - .parMap { withRetry(it, requestConfig) } + .parMap { createEmbeddingWithRetry(it, requestConfig) } .flatten() - private suspend fun withRetry(texts: List, requestConfig: RequestConfig): List = + private suspend fun createEmbeddingWithRetry(texts: List, requestConfig: RequestConfig): List = kotlin.runCatching { config.retryConfig.schedule() .log { retriesSoFar, _ -> logger.warn { "Open AI call failed. So far we have retried $retriesSoFar times." } } From 7af5f5a48cb0c7f5988a0349ee40dda9d59b72bb Mon Sep 17 00:00:00 2001 From: Simon Vergauwen Date: Thu, 27 Apr 2023 18:24:25 +0200 Subject: [PATCH 9/9] Apply suggestions from code review Co-authored-by: Francisco Diaz --- .../kotlin/com/xebia/functional/vectorstores/VectorStore.kt | 2 +- src/jvmMain/kotlin/com/xebia/functional/JDBCSyntax.kt | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/commonMain/kotlin/com/xebia/functional/vectorstores/VectorStore.kt b/src/commonMain/kotlin/com/xebia/functional/vectorstores/VectorStore.kt index cfd7861ec..3d0e4e29c 100644 --- a/src/commonMain/kotlin/com/xebia/functional/vectorstores/VectorStore.kt +++ b/src/commonMain/kotlin/com/xebia/functional/vectorstores/VectorStore.kt @@ -38,7 +38,7 @@ interface VectorStore { * Return the docs most similar to the embedding * * @param embedding embedding vector to use to search for similar documents - * @param k number of documents to return + * @param limit number of documents to return * @return list of Documents most similar to the embedding */ suspend fun similaritySearchByVector(embedding: Embedding, limit: Int): List diff --git a/src/jvmMain/kotlin/com/xebia/functional/JDBCSyntax.kt b/src/jvmMain/kotlin/com/xebia/functional/JDBCSyntax.kt index 9beb5ee96..ae9aa157d 100644 --- a/src/jvmMain/kotlin/com/xebia/functional/JDBCSyntax.kt +++ b/src/jvmMain/kotlin/com/xebia/functional/JDBCSyntax.kt @@ -52,7 +52,6 @@ class JDBCSyntax(conn: Connection, resourceScope: ResourceScope) : ResourceScope mapper: NullableSqlCursor.() -> A? ): List { val statement = prepareStatement(sql, binders) - println(statement.toString()) val rs = autoCloseable { statement.executeQuery() } return buildList { while (rs.next()) {