Skip to content

Commit

Permalink
Neo4j embedding store
Browse files Browse the repository at this point in the history
Co-authored-by: Michael Simons <michael@simons.ac>
  • Loading branch information
jmartisk and michael-simons committed May 7, 2024
1 parent f34c6b6 commit 610ff76
Show file tree
Hide file tree
Showing 12 changed files with 646 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/modules/ROOT/nav.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
** xref:pgvector-store.adoc[PgVector (PostgreSQL) Store]
** xref:in-process-embedding.adoc[In-Process Embeddings]
** xref:csv.adoc[Loading CSV files]
** xref:neo4j.adoc[Neo4j Store]
* Additional tools
** xref:easy-rag.adoc[Easy RAG]
Expand Down
171 changes: 171 additions & 0 deletions docs/modules/ROOT/pages/includes/quarkus-langchain4j-neo4j.adoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@

:summaryTableId: quarkus-langchain4j-neo4j
[.configuration-legend]
icon:lock[title=Fixed at build time] Configuration property fixed at build time - All other configuration properties are overridable at runtime
[.configuration-reference.searchable, cols="80,.^10,.^10"]
|===

h|[[quarkus-langchain4j-neo4j_configuration]]link:#quarkus-langchain4j-neo4j_configuration[Configuration property]

h|Type
h|Default

a| [[quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-dimension]]`link:#quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-dimension[quarkus.langchain4j.neo4j.dimension]`


[.description]
--
Dimension of the embeddings that will be stored in the Neo4j store.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_NEO4J_DIMENSION+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_NEO4J_DIMENSION+++`
endif::add-copy-button-to-env-var[]
--|int
|required icon:exclamation-circle[title=Configuration property is required]


a| [[quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-label]]`link:#quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-label[quarkus.langchain4j.neo4j.label]`


[.description]
--
Label for the created nodes.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_NEO4J_LABEL+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_NEO4J_LABEL+++`
endif::add-copy-button-to-env-var[]
--|string
|`Document`


a| [[quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-embedding-property]]`link:#quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-embedding-property[quarkus.langchain4j.neo4j.embedding-property]`


[.description]
--
Name of the property to store the embedding vectors.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_NEO4J_EMBEDDING_PROPERTY+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_NEO4J_EMBEDDING_PROPERTY+++`
endif::add-copy-button-to-env-var[]
--|string
|`embedding`


a| [[quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-id-property]]`link:#quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-id-property[quarkus.langchain4j.neo4j.id-property]`


[.description]
--
Name of the property to store embedding IDs.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_NEO4J_ID_PROPERTY+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_NEO4J_ID_PROPERTY+++`
endif::add-copy-button-to-env-var[]
--|string
|`id`


a| [[quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-metadata-prefix]]`link:#quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-metadata-prefix[quarkus.langchain4j.neo4j.metadata-prefix]`


[.description]
--
Prefix to be added to the metadata keys. By default, no prefix is used.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_NEO4J_METADATA_PREFIX+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_NEO4J_METADATA_PREFIX+++`
endif::add-copy-button-to-env-var[]
--|string
|


a| [[quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-text-property]]`link:#quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-text-property[quarkus.langchain4j.neo4j.text-property]`


[.description]
--
Name of the property to store the embedding text.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_NEO4J_TEXT_PROPERTY+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_NEO4J_TEXT_PROPERTY+++`
endif::add-copy-button-to-env-var[]
--|string
|`text`


a| [[quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-index-name]]`link:#quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-index-name[quarkus.langchain4j.neo4j.index-name]`


[.description]
--
Name of the index to be created for vector search.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_NEO4J_INDEX_NAME+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_NEO4J_INDEX_NAME+++`
endif::add-copy-button-to-env-var[]
--|string
|`vector`


a| [[quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-database-name]]`link:#quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-database-name[quarkus.langchain4j.neo4j.database-name]`


[.description]
--
Name of the database to connect to.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_NEO4J_DATABASE_NAME+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_NEO4J_DATABASE_NAME+++`
endif::add-copy-button-to-env-var[]
--|string
|`neo4j`


a| [[quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-retrieval-query]]`link:#quarkus-langchain4j-neo4j_quarkus-langchain4j-neo4j-retrieval-query[quarkus.langchain4j.neo4j.retrieval-query]`


[.description]
--
The query to use when retrieving embeddings. This query has to return the following columns:

- metadata
- score
- column of the same name as the 'id-property' value
- column of the same name as the 'text-property' value
- column of the same name as the 'embedding-property' value

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_NEO4J_RETRIEVAL_QUERY+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_NEO4J_RETRIEVAL_QUERY+++`
endif::add-copy-button-to-env-var[]
--|string
|`RETURN properties(node) AS metadata, node.${quarkus.langchain4j.neo4j.id-property} AS ${quarkus.langchain4j.neo4j.id-property}, node.${quarkus.langchain4j.neo4j.text-property} AS ${quarkus.langchain4j.neo4j.text-property}, node.${quarkus.langchain4j.neo4j.embedding-property} AS ${quarkus.langchain4j.neo4j.embedding-property}, score`

|===
38 changes: 38 additions & 0 deletions docs/modules/ROOT/pages/neo4j.adoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
= Neo4j Store for Retrieval Augmented Generation (RAG)

include::./includes/attributes.adoc[]

When implementing Retrieval Augmented Generation (RAG), a robust document store is crucial. This guide demonstrates how to leverage a https://neo4j.com/[Neo4j] database as the document store.

IMPORTANT: Neo4j version 5.x or later is required (to support vector search).

== Leveraging the Neo4j embedding store

To make use of the Neo4j embedding store, you'll need to include the following dependency:

[source,xml,subs=attributes+]
----
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-neo4j</artifactId>
</dependency>
----

The `quarkus-lanchain4j-neo4j` extension depends on another Quarkiverse
extension, https://github.com/quarkiverse/quarkus-neo4j[quarkus-neo4j],
which provides the Neo4j client capabilities and also Dev Services support.
All configuration from the `quarkus-neo4j` extension is thus applicable when
using a Neo4j database as the document store. See
https://docs.quarkiverse.io/quarkus-neo4j/dev/index.html[quarkus-neo4j
documentation] for more information.

NOTE: To get started, only one configuration property is required to be
set - `quarkus.langchain4j.neo4j.dimension`, which specifies the dimension
of the embeddings that you're going to store and depends on the embedding
model.

== Configuration Settings

Customize the behavior of the extension by exploring various configuration options:

include::includes/quarkus-langchain4j-neo4j.adoc[leveloffset=+1,opts=optional]
14 changes: 14 additions & 0 deletions docs/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,19 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-neo4j-deployment</artifactId>
<version>${project.version}</version>
<type>pom</type>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>*</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>

<build>
Expand Down Expand Up @@ -312,6 +325,7 @@
<include>quarkus-langchain4j-bam.adoc</include>
<include>quarkus-langchain4j-watsonx.adoc</include>
<include>quarkus-langchain4j-mistralai.adoc</include>
<include>quarkus-langchain4j-neo4j.adoc</include>
<filtering>false</filtering>
</resource>
<resource>
Expand Down
83 changes: 83 additions & 0 deletions neo4j/deployment/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-neo4j-parent</artifactId>
<version>999-SNAPSHOT</version>
</parent>
<artifactId>quarkus-langchain4j-neo4j-deployment</artifactId>
<name>Quarkus LangChain4j - Neo4j embedding store - Deployment</name>
<dependencies>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-arc-deployment</artifactId>
</dependency>
<dependency>
<groupId>io.quarkiverse.neo4j</groupId>
<artifactId>quarkus-neo4j-deployment</artifactId>
<version>${quarkus-neo4j.version}</version>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-neo4j</artifactId>
</dependency>
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-neo4j</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-core-deployment</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-core</artifactId>
<classifier>tests</classifier>
<type>test-jar</type>
<scope>test</scope>
<version>${langchain4j.version}</version>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-junit5-internal</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<version>${assertj.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-embeddings-all-minilm-l6-v2-q</artifactId>
<version>${langchain4j-embeddings.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-core</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<annotationProcessorPaths>
<path>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-extension-processor</artifactId>
<version>${quarkus.version}</version>
</path>
</annotationProcessorPaths>
</configuration>
</plugin>
</plugins>
</build>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package io.quarkiverse.langchain4j.neo4j;

import jakarta.enterprise.context.ApplicationScoped;

import org.jboss.jandex.ClassType;
import org.jboss.jandex.DotName;
import org.jboss.jandex.ParameterizedType;
import org.neo4j.driver.Driver;

import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.store.embedding.neo4j.Neo4jEmbeddingStore;
import io.quarkiverse.langchain4j.deployment.EmbeddingStoreBuildItem;
import io.quarkiverse.langchain4j.neo4j.runtime.Neo4jEmbeddingStoreRecorder;
import io.quarkiverse.langchain4j.neo4j.runtime.Neo4jRuntimeConfig;
import io.quarkus.arc.deployment.SyntheticBeanBuildItem;
import io.quarkus.arc.deployment.UnremovableBeanBuildItem;
import io.quarkus.deployment.annotations.BuildProducer;
import io.quarkus.deployment.annotations.BuildStep;
import io.quarkus.deployment.annotations.ExecutionTime;
import io.quarkus.deployment.annotations.Record;
import io.quarkus.deployment.builditem.FeatureBuildItem;

public class Neo4jEmbeddingStoreProcessor {

private static final String FEATURE = "langchain4j-neo4j";
private static final DotName NEO4J_EMBEDDING_STORE = DotName.createSimple(Neo4jEmbeddingStore.class);

@BuildStep
FeatureBuildItem feature() {
return new FeatureBuildItem(FEATURE);
}

@BuildStep
@Record(ExecutionTime.RUNTIME_INIT)
public void createBean(
BuildProducer<SyntheticBeanBuildItem> beanProducer,
Neo4jEmbeddingStoreRecorder recorder,
Neo4jRuntimeConfig config,
BuildProducer<UnremovableBeanBuildItem> unremovableProducer,
BuildProducer<EmbeddingStoreBuildItem> embeddingStoreProducer) {
unremovableProducer.produce(UnremovableBeanBuildItem.beanTypes(Driver.class));
beanProducer.produce(SyntheticBeanBuildItem
.configure(NEO4J_EMBEDDING_STORE)
.types(
ClassType.create(EmbeddingStore.class),
ClassType.create(NEO4J_EMBEDDING_STORE),
ParameterizedType.create(EmbeddingStore.class, ClassType.create(TextSegment.class)))
.defaultBean()
.setRuntimeInit()
.defaultBean()
.scope(ApplicationScoped.class)
.addInjectionPoint(ClassType.create(DotName.createSimple(Driver.class)))
.createWith(recorder.embeddingStoreFunction(config))
.done());
embeddingStoreProducer.produce(new EmbeddingStoreBuildItem());
}
}
Loading

0 comments on commit 610ff76

Please sign in to comment.