Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Neo4j embedding store #548

Merged
merged 1 commit into from
May 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/modules/ROOT/nav.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
** xref:pgvector-store.adoc[PgVector (PostgreSQL) Store]
** xref:in-process-embedding.adoc[In-Process Embeddings]
** xref:csv.adoc[Loading CSV files]
** xref:neo4j.adoc[Neo4j Store]

* Additional tools
** xref:easy-rag.adoc[Easy RAG]
Expand Down
171 changes: 171 additions & 0 deletions docs/modules/ROOT/pages/includes/quarkus-langchain4j-neo4j.adoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@

:summaryTableId: quarkus-langchain4j-neo4j
[.configuration-legend]
icon:lock[title=Fixed at build time] Configuration property fixed at build time - All other configuration properties are overridable at runtime
[.configuration-reference.searchable, cols="80,.^10,.^10"]
|===

h|[[quarkus-langchain4j-neo4j_configuration]]link:#quarkus-langchain4j-neo4j_configuration[Configuration property]

h|Type
h|Default

a| [[quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-dimension]]`link:#quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-dimension[quarkus.langchain4j.neo4j.dimension]`


[.description]
--
Dimension of the embeddings that will be stored in the Neo4j store.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_NEO4J_DIMENSION+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_NEO4J_DIMENSION+++`
endif::add-copy-button-to-env-var[]
--|int
|required icon:exclamation-circle[title=Configuration property is required]


a| [[quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-label]]`link:#quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-label[quarkus.langchain4j.neo4j.label]`


[.description]
--
Label for the created nodes.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_NEO4J_LABEL+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_NEO4J_LABEL+++`
endif::add-copy-button-to-env-var[]
--|string
|`Document`


a| [[quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-embedding-property]]`link:#quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-embedding-property[quarkus.langchain4j.neo4j.embedding-property]`


[.description]
--
Name of the property to store the embedding vectors.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_NEO4J_EMBEDDING_PROPERTY+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_NEO4J_EMBEDDING_PROPERTY+++`
endif::add-copy-button-to-env-var[]
--|string
|`embedding`


a| [[quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-id-property]]`link:#quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-id-property[quarkus.langchain4j.neo4j.id-property]`


[.description]
--
Name of the property to store embedding IDs.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_NEO4J_ID_PROPERTY+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_NEO4J_ID_PROPERTY+++`
endif::add-copy-button-to-env-var[]
--|string
|`id`


a| [[quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-metadata-prefix]]`link:#quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-metadata-prefix[quarkus.langchain4j.neo4j.metadata-prefix]`


[.description]
--
Prefix to be added to the metadata keys. By default, no prefix is used.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_NEO4J_METADATA_PREFIX+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_NEO4J_METADATA_PREFIX+++`
endif::add-copy-button-to-env-var[]
--|string
|


a| [[quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-text-property]]`link:#quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-text-property[quarkus.langchain4j.neo4j.text-property]`


[.description]
--
Name of the property to store the embedding text.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_NEO4J_TEXT_PROPERTY+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_NEO4J_TEXT_PROPERTY+++`
endif::add-copy-button-to-env-var[]
--|string
|`text`


a| [[quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-index-name]]`link:#quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-index-name[quarkus.langchain4j.neo4j.index-name]`


[.description]
--
Name of the index to be created for vector search.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_NEO4J_INDEX_NAME+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_NEO4J_INDEX_NAME+++`
endif::add-copy-button-to-env-var[]
--|string
|`vector`


a| [[quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-database-name]]`link:#quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-database-name[quarkus.langchain4j.neo4j.database-name]`


[.description]
--
Name of the database to connect to.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_NEO4J_DATABASE_NAME+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_NEO4J_DATABASE_NAME+++`
endif::add-copy-button-to-env-var[]
--|string
|`neo4j`


a| [[quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-retrieval-query]]`link:#quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-retrieval-query[quarkus.langchain4j.neo4j.retrieval-query]`


[.description]
--
The query to use when retrieving embeddings. This query has to return the following columns:

- metadata
- score
- column of the same name as the 'id-property' value
- column of the same name as the 'text-property' value
- column of the same name as the 'embedding-property' value

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_NEO4J_RETRIEVAL_QUERY+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_NEO4J_RETRIEVAL_QUERY+++`
endif::add-copy-button-to-env-var[]
--|string
|`RETURN properties(node) AS metadata, node.${quarkus.langchain4j.neo4j.id-property} AS ${quarkus.langchain4j.neo4j.id-property}, node.${quarkus.langchain4j.neo4j.text-property} AS ${quarkus.langchain4j.neo4j.text-property}, node.${quarkus.langchain4j.neo4j.embedding-property} AS ${quarkus.langchain4j.neo4j.embedding-property}, score`

|===
38 changes: 38 additions & 0 deletions docs/modules/ROOT/pages/neo4j.adoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
= Neo4j Store for Retrieval Augmented Generation (RAG)

include::./includes/attributes.adoc[]

When implementing Retrieval Augmented Generation (RAG), a robust document store is crucial. This guide demonstrates how to leverage a https://neo4j.com/[Neo4j] database as the document store.

IMPORTANT: Neo4j version 5.x or later is required (to support vector search).

== Leveraging the Neo4j embedding store

To make use of the Neo4j embedding store, you'll need to include the following dependency:

[source,xml,subs=attributes+]
----
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-neo4j</artifactId>
</dependency>
----

The `quarkus-lanchain4j-neo4j` extension depends on another Quarkiverse
extension, https://github.com/quarkiverse/quarkus-neo4j[quarkus-neo4j],
which provides the Neo4j client capabilities and also Dev Services support.
All configuration from the `quarkus-neo4j` extension is thus applicable when
using a Neo4j database as the document store. See
https://docs.quarkiverse.io/quarkus-neo4j/dev/index.html[quarkus-neo4j
documentation] for more information.

NOTE: To get started, only one configuration property is required to be
set - `quarkus.langchain4j.neo4j.dimension`, which specifies the dimension
of the embeddings that you're going to store and depends on the embedding
model.

== Configuration Settings

Customize the behavior of the extension by exploring various configuration options:

include::includes/quarkus-langchain4j-neo4j.adoc[leveloffset=+1,opts=optional]
14 changes: 14 additions & 0 deletions docs/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,19 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-neo4j-deployment</artifactId>
<version>${project.version}</version>
<type>pom</type>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>*</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>

<build>
Expand Down Expand Up @@ -312,6 +325,7 @@
<include>quarkus-langchain4j-bam.adoc</include>
<include>quarkus-langchain4j-watsonx.adoc</include>
<include>quarkus-langchain4j-mistralai.adoc</include>
<include>quarkus-langchain4j-neo4j.adoc</include>
<filtering>false</filtering>
</resource>
<resource>
Expand Down
83 changes: 83 additions & 0 deletions neo4j/deployment/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-neo4j-parent</artifactId>
<version>999-SNAPSHOT</version>
</parent>
<artifactId>quarkus-langchain4j-neo4j-deployment</artifactId>
<name>Quarkus LangChain4j - Neo4j embedding store - Deployment</name>
<dependencies>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-arc-deployment</artifactId>
</dependency>
<dependency>
<groupId>io.quarkiverse.neo4j</groupId>
<artifactId>quarkus-neo4j-deployment</artifactId>
<version>${quarkus-neo4j.version}</version>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-neo4j</artifactId>
</dependency>
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-neo4j</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-core-deployment</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-core</artifactId>
<classifier>tests</classifier>
<type>test-jar</type>
<scope>test</scope>
<version>${langchain4j.version}</version>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-junit5-internal</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<version>${assertj.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-embeddings-all-minilm-l6-v2-q</artifactId>
<version>${langchain4j-embeddings.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-core</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<annotationProcessorPaths>
<path>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-extension-processor</artifactId>
<version>${quarkus.version}</version>
</path>
</annotationProcessorPaths>
</configuration>
</plugin>
</plugins>
</build>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package io.quarkiverse.langchain4j.neo4j;

import jakarta.enterprise.context.ApplicationScoped;

import org.jboss.jandex.ClassType;
import org.jboss.jandex.DotName;
import org.jboss.jandex.ParameterizedType;
import org.neo4j.driver.Driver;

import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.store.embedding.neo4j.Neo4jEmbeddingStore;
import io.quarkiverse.langchain4j.deployment.EmbeddingStoreBuildItem;
import io.quarkiverse.langchain4j.neo4j.runtime.Neo4jEmbeddingStoreRecorder;
import io.quarkiverse.langchain4j.neo4j.runtime.Neo4jRuntimeConfig;
import io.quarkus.arc.deployment.SyntheticBeanBuildItem;
import io.quarkus.arc.deployment.UnremovableBeanBuildItem;
import io.quarkus.deployment.annotations.BuildProducer;
import io.quarkus.deployment.annotations.BuildStep;
import io.quarkus.deployment.annotations.ExecutionTime;
import io.quarkus.deployment.annotations.Record;
import io.quarkus.deployment.builditem.FeatureBuildItem;

public class Neo4jEmbeddingStoreProcessor {

private static final String FEATURE = "langchain4j-neo4j";
private static final DotName NEO4J_EMBEDDING_STORE = DotName.createSimple(Neo4jEmbeddingStore.class);

@BuildStep
FeatureBuildItem feature() {
return new FeatureBuildItem(FEATURE);
}

@BuildStep
@Record(ExecutionTime.RUNTIME_INIT)
public void createBean(
BuildProducer<SyntheticBeanBuildItem> beanProducer,
Neo4jEmbeddingStoreRecorder recorder,
Neo4jRuntimeConfig config,
BuildProducer<UnremovableBeanBuildItem> unremovableProducer,
BuildProducer<EmbeddingStoreBuildItem> embeddingStoreProducer) {
unremovableProducer.produce(UnremovableBeanBuildItem.beanTypes(Driver.class));
beanProducer.produce(SyntheticBeanBuildItem
.configure(NEO4J_EMBEDDING_STORE)
.types(
ClassType.create(EmbeddingStore.class),
ClassType.create(NEO4J_EMBEDDING_STORE),
ParameterizedType.create(EmbeddingStore.class, ClassType.create(TextSegment.class)))
.defaultBean()
.setRuntimeInit()
.defaultBean()
.scope(ApplicationScoped.class)
.addInjectionPoint(ClassType.create(DotName.createSimple(Driver.class)))
.createWith(recorder.embeddingStoreFunction(config))
.done());
embeddingStoreProducer.produce(new EmbeddingStoreBuildItem());
}
}
Loading
Loading