diff --git a/examples/rag-spring-article/pom.xml b/examples/rag-spring-article/pom.xml
index 3bf33b87d..68b4b4126 100644
--- a/examples/rag-spring-article/pom.xml
+++ b/examples/rag-spring-article/pom.xml
@@ -39,27 +39,29 @@
org.springframework.ai
spring-ai-bom
- 1.0.0-SNAPSHOT
+ 1.0.0-M3
pom
import
+
org.springframework.ai
- spring-ai-elasticsearch-store
+ spring-ai-spring-boot-autoconfigure
1.0.0-SNAPSHOT
+
- org.apache.tika
- tika-core
- 2.9.2
+ org.springframework.ai
+ spring-ai-elasticsearch-store
+ 1.0.0-SNAPSHOT
- org.apache.tika
- tika-parser-pdf-module
- 2.9.2
+ org.springframework.ai
+ spring-ai-pdf-document-reader
+ 1.0.0-SNAPSHOT
diff --git a/examples/rag-spring-article/src/main/java/co/elastic/clients/rag/article/Config.java b/examples/rag-spring-article/src/main/java/co/elastic/clients/rag/article/Config.java
deleted file mode 100644
index 1568c9dbc..000000000
--- a/examples/rag-spring-article/src/main/java/co/elastic/clients/rag/article/Config.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Licensed to Elasticsearch B.V. under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch B.V. licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package co.elastic.clients.rag.article;
-
-import org.apache.http.Header;
-import org.apache.http.HttpHost;
-import org.apache.http.message.BasicHeader;
-import org.elasticsearch.client.RestClient;
-import org.springframework.ai.chat.model.ChatModel;
-import org.springframework.ai.embedding.EmbeddingModel;
-import org.springframework.ai.openai.OpenAiChatModel;
-import org.springframework.ai.openai.OpenAiEmbeddingModel;
-import org.springframework.ai.openai.api.OpenAiApi;
-import org.springframework.ai.vectorstore.ElasticsearchVectorStore;
-import org.springframework.ai.vectorstore.ElasticsearchVectorStoreOptions;
-import org.springframework.context.annotation.Bean;
-import org.springframework.context.annotation.Configuration;
-
-@Configuration
-public class Config {
-
- @Bean
- public ElasticsearchVectorStore vectorStoreDefault(EmbeddingModel embeddingModel, RestClient restClient) {
- ElasticsearchVectorStoreOptions options = new ElasticsearchVectorStoreOptions();
- return new ElasticsearchVectorStore(options,restClient, embeddingModel, true);
- }
-
- @Bean
- public EmbeddingModel embeddingModel() {
- return new OpenAiEmbeddingModel(new OpenAiApi(System.getenv("OPENAI_API_KEY")));
- }
-
- @Bean
- public ChatModel chatModel() {
- return new OpenAiChatModel(new OpenAiApi(System.getenv("OPENAI_API_KEY")));
- }
-
- @Bean
- RestClient restClient() {
-
- return RestClient
- .builder(HttpHost.create(System.getenv("ES_SERVER_URL")))
- .setDefaultHeaders(new Header[]{
- new BasicHeader("Authorization", "ApiKey " + System.getenv("ES_API_KEY"))
- })
- .build();
- }
-
-}
diff --git a/examples/rag-spring-article/src/main/java/co/elastic/clients/rag/article/PageContentHandler.java b/examples/rag-spring-article/src/main/java/co/elastic/clients/rag/article/PageContentHandler.java
deleted file mode 100644
index 0fb58b0ba..000000000
--- a/examples/rag-spring-article/src/main/java/co/elastic/clients/rag/article/PageContentHandler.java
+++ /dev/null
@@ -1,128 +0,0 @@
-package co.elastic.clients.rag.article;
-
-import org.apache.tika.sax.ToTextContentHandler;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.xml.sax.Attributes;
-import org.xml.sax.SAXException;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-// taken from:
-// https://github.com/mkalus/tika-page-extractor/blob/master/src/main/java/de/auxnet/PageContentHandler.java
-public class PageContentHandler extends ToTextContentHandler {
- /**
- * logger
- */
- private static final Logger logger = LoggerFactory.getLogger(PageContentHandler.class);
-
- final static private String pageTag = "div";
- final static private String pageClass = "page";
-
- /**
- * StringBuilder of current page
- */
- private StringBuilder builder;
-
- /**
- * page counter
- */
- private int pageNumber = 0;
-
- /**
- * page map - setting the initial capacity to 500 will enhance speed by a tiny bit up to 500 bits, but will require
- * more RAM
- */
- private Map pages = new HashMap<>(500);
-
- /**
- * flag telling to compress text information by stripping whitespace?
- */
- private final boolean compress;
-
- /**
- * Default constructor
- */
- public PageContentHandler() {
- this.compress = true;
- }
-
- /**
- * Constructor
- *
- * @param compress text information by stripping whitespace?
- */
- public PageContentHandler(boolean compress) {
- this.compress = compress;
- }
-
- @Override
- public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
- if (pageTag.endsWith(qName) && pageClass.equals(atts.getValue("class")))
- startPage();
- }
-
- @Override
- public void endElement(String uri, String localName, String qName) throws SAXException {
- if (pageTag.endsWith(qName))
- endPage();
- }
-
- @Override
- public void characters(char[] ch, int start, int length) throws SAXException {
- // append data
- if (length > 0 && builder != null) {
- builder.append(ch);
- }
- }
-
- protected void startPage() throws SAXException {
- builder = new StringBuilder();
- pageNumber++;
- if (logger.isDebugEnabled())
- logger.debug("Page: " + pageNumber);
- }
-
- protected void endPage() throws SAXException {
- String page = builder.toString();
- builder = new StringBuilder();
-
- // if compression has been turned on, compact whitespace and trim string
- if (compress)
- page = page.replaceAll("\\s+", " ").trim();
-
- // page number already exists?
- if (pages.containsKey(pageNumber)) {
- if (page.isEmpty()) return; // do not add empty pages to map
-
- page = pages.get(pageNumber) + " " + page; // concatenate pages
- page = page.trim();
- }
-
- // add to page list
- pages.put(pageNumber, page);
- }
-
- /**
- * @return all extracted pages
- */
- public List getPages() {
- List pagesReal = new ArrayList<>(pageNumber);
-
- // convert to list
- for (int i = 1; i <= pageNumber; i++) {
- String page = pages.get(i);
- if (page == null) page = "";
-
- pagesReal.add(page);
- }
-
- if (logger.isDebugEnabled())
- logger.debug("Returning " + pageNumber + " page(s).");
-
- return pagesReal;
- }
-}
diff --git a/examples/rag-spring-article/src/main/java/co/elastic/clients/rag/article/RagService.java b/examples/rag-spring-article/src/main/java/co/elastic/clients/rag/article/RagService.java
index 73e7a9912..50a76b7c8 100644
--- a/examples/rag-spring-article/src/main/java/co/elastic/clients/rag/article/RagService.java
+++ b/examples/rag-spring-article/src/main/java/co/elastic/clients/rag/article/RagService.java
@@ -18,79 +18,34 @@
*/
package co.elastic.clients.rag.article;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.pdf.PDFParserConfig;
-import org.springframework.ai.chat.messages.Message;
-import org.springframework.ai.chat.messages.UserMessage;
-import org.springframework.ai.chat.model.ChatModel;
-import org.springframework.ai.chat.model.ChatResponse;
-import org.springframework.ai.chat.prompt.Prompt;
-import org.springframework.ai.chat.prompt.SystemPromptTemplate;
+import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.document.Document;
+import org.springframework.ai.reader.pdf.PagePdfDocumentReader;
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import org.springframework.ai.vectorstore.ElasticsearchVectorStore;
import org.springframework.ai.vectorstore.SearchRequest;
-import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
-import org.xml.sax.SAXException;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
import java.util.List;
-import java.util.Map;
import java.util.stream.Collectors;
@Service
public class RagService {
+ // Both beans autowired from default configuration
private ElasticsearchVectorStore vectorStore;
- private ChatModel chatModel;
+ private ChatClient chatClient;
- @Autowired
- public RagService(ElasticsearchVectorStore vectorStore, ChatModel model) {
+ public RagService(ElasticsearchVectorStore vectorStore, ChatClient.Builder clientBuilder) {
this.vectorStore = vectorStore;
- this.chatModel = model;
+ this.chatClient = clientBuilder.build();
}
- public void ingestPDF(String path) throws IOException, TikaException, SAXException {
- // Initializing the PDF parser
- // Keep in mind that AutoDetectParser is not thread safe
- Parser parser = new AutoDetectParser();
- // Using our custom single page handler class
- PageContentHandler handler = new PageContentHandler();
+ public void ingestPDF(String path) {
- // No need for any other specific PDF configuration
- ParseContext parseContext = new ParseContext();
- parseContext.set(PDFParserConfig.class, new PDFParserConfig());
-
- // The metadata contain information such as creation date, creation tool used, etc... which we
- // don't need
- Metadata metadata = new Metadata();
-
- // Reading the file
- try (FileInputStream stream = new FileInputStream(path)) {
- parser.parse(stream, handler, metadata, parseContext);
- }
-
- // Getting the result as a list of Strings with the content of the pages
- List allPages = handler.getPages();
- List docbatch = new ArrayList<>();
-
- // Converting pages to Documents
- for (int i = 0; i < allPages.size(); i++) {
- Map docMetadata = new HashMap<>();
- // The page number will be used in the response
- docMetadata.put("page", i + 1);
-
- Document doc = new Document(allPages.get(i), docMetadata);
- docbatch.add(doc);
- }
+ // Spring AI utility class to read a PDF file page by page
+ PagePdfDocumentReader pdfReader = new PagePdfDocumentReader(path);
+ List docbatch = pdfReader.read();
// Sending batch of documents to vector store
// applying tokenizer
@@ -109,31 +64,31 @@ public String queryLLM(String question) {
.map(Document::getContent)
.collect(Collectors.joining(System.lineSeparator()));
- // Setting the prompt
- String basePrompt = """
+ // Setting the prompt with the context
+ String prompt = """
You're assisting with providing the rules of the tabletop game Runewars.
- Use the information from the DOCUMENTS section to provide accurate answers.
+ Use the information from the DOCUMENTS section to provide accurate answers to the
+ question in the QUESTION section.
If unsure, simply state that you don't know.
DOCUMENTS:
- {documents}
- """;
-
- // Preparing the question for the LLM
- SystemPromptTemplate systemPromptTemplate = new SystemPromptTemplate(basePrompt);
- Message systemMessage = systemPromptTemplate.createMessage(Map.of("documents", documents));
+ """ + documents
+ + """
+ QUESTION:
+ """ + question;
- UserMessage userMessage = new UserMessage(question);
- Prompt prompt = new Prompt(List.of(systemMessage, userMessage));
// Calling the chat model with the question
- ChatResponse response = chatModel.call(prompt);
+ String response = chatClient.prompt()
+ .user(prompt)
+ .call()
+ .content();
- return response.getResult().getOutput().getContent() +
+ return response +
System.lineSeparator() +
"Found at page: " +
// Retrieving the first ranked page number from the document metadata
- vectorStoreResult.get(0).getMetadata().get("page") +
+ vectorStoreResult.get(0).getMetadata().get(PagePdfDocumentReader.METADATA_START_PAGE_NUMBER) +
" of the manual";
}
}
diff --git a/examples/rag-spring-article/src/main/resources/application.properties b/examples/rag-spring-article/src/main/resources/application.properties
index 2b7cf4008..ffc9d4b5c 100644
--- a/examples/rag-spring-article/src/main/resources/application.properties
+++ b/examples/rag-spring-article/src/main/resources/application.properties
@@ -1 +1,9 @@
spring.application.name=rag
+
+spring.ai.openai.api-key=${OPENAI_API_KEY}
+spring.ai.chat.client.enabled=true
+
+spring.elasticsearch.uris=${ES_SERVER_URL}
+spring.elasticsearch.username=${ES_USERNAME}
+spring.elasticsearch.password=${ES_PASSWORD}
+spring.ai.vectorstore.elasticsearch.initialize-schema=true