diff --git a/api/src/libs/composedb.js b/api/src/libs/composedb.js index 033686fa..b6af93f7 100644 --- a/api/src/libs/composedb.js +++ b/api/src/libs/composedb.js @@ -218,6 +218,15 @@ let defaultRuntime = { property: "indexId", }, }, + embeddings: { + type: "view", + viewType: "relation", + relation: { + source: "queryConnection", + model: "Model_Embedding_ID", + property: "indexId", + }, + }, }, IndexItem: { itemId: { @@ -271,6 +280,15 @@ let defaultRuntime = { }, }, controllerDID: { type: "view", viewType: "documentAccount" }, + embeddings: { + type: "view", + viewType: "relation", + relation: { + source: "queryConnection", + model: "Model_Embedding_ID", + property: "itemId", + }, + }, }, Profile: { bio: { type: "string", required: false, immutable: false }, diff --git a/api/src/libs/indexer.js b/api/src/libs/indexer.js index c27e8f83..df7be549 100644 --- a/api/src/libs/indexer.js +++ b/api/src/libs/indexer.js @@ -45,6 +45,12 @@ class Indexer { const itemService = new ItemService(this.definition); const indexItem = await itemService.getIndexItemById(id, false); + if (indexItem.index.controllerDID.id !== indexItem.controllerDID.id) { + logger.warn( + `Step [0]: IndexItem is unauthorized to index: ${JSON.stringify(indexItem)}`, + ); + return; + } logger.info( `Step [0]: IndexItem found for id: ${JSON.stringify(indexItem)}`, ); @@ -121,6 +127,13 @@ class Indexer { const itemService = new ItemService(this.definition); const indexItem = await itemService.getIndexItemById(id, false); + if (indexItem.index.controllerDID.id !== indexItem.controllerDID.id) { + logger.warn( + `Step [0]: IndexItem is unauthorized to index: ${JSON.stringify(indexItem)}`, + ); + return; + } + try { const indexSession = await this.getIndexerSession(indexItem.index); await indexSession.did.authenticate(); @@ -305,6 +318,13 @@ class Indexer { const embedding = await embeddingService.getEmbeddingById(id); const itemStream = await ceramic.loadStream(embedding.item.id); + if (embedding.index.controllerDID.id !== embedding.controllerDID.id) { + logger.warn( + `Step [0]: Embedding is unauthorized to index: ${JSON.stringify(indexItem)}`, + ); + return; + } + const doc = { ...itemStream.content, id: itemStream.id.toString(), @@ -317,7 +337,7 @@ class Indexer { indexCreatedAt: embedding.index.createdAt, indexUpdatedAt: embedding.index.updatedAt, indexDeletedAt: embedding.index.deletedAt, - indexOwnerDID: embedding.index.ownerDID.id, + indexControllerDID: embedding.index.controllerDID.id, vector: embedding.vector, }; @@ -325,11 +345,11 @@ class Indexer { payload[key] = doc[key]; } - if (embedding.index.ownerDID.name) { - payload.indexOwnerName = embedding.index.ownerDID.name; + if (embedding.index.controllerDID.name) { + payload.indexOwnerName = embedding.index.controllerDID.name; } - if (embedding.index.ownerDID.bio) { - payload.indexOwnerBio = embedding.index.ownerDID.bio; + if (embedding.index.controllerDID.bio) { + payload.indexOwnerBio = embedding.index.controllerDID.bio; } try { diff --git a/api/src/libs/lit/index.js b/api/src/libs/lit/index.js index 1f2acac4..c1adc289 100644 --- a/api/src/libs/lit/index.js +++ b/api/src/libs/lit/index.js @@ -625,7 +625,7 @@ export const getPKPSessionWithLIT = async (session, index) => { export const getRolesFromSession = (index, session, definition) => { if ( session.cacao.p.resources.indexOf("ceramic://*") > -1 && - index.ownerDID.id == session.did.parent + index.controllerDID.id == session.did.parent ) { return { owner: true, diff --git a/api/src/services/embedding.js b/api/src/services/embedding.js index d74db51a..ae288430 100644 --- a/api/src/services/embedding.js +++ b/api/src/services/embedding.js @@ -10,6 +10,7 @@ export class EmbeddingService { definition, }); this.did = null; + this.indexService = new IndexService(definition); } setSession(session) { @@ -64,7 +65,6 @@ export class EmbeddingService { if (!data || !data.node) { throw new Error("Invalid response data"); } - data.node.index.ownerDID = data.node.index.controllerDID; return data.node; } catch (error) { @@ -97,6 +97,22 @@ export class EmbeddingService { context vector description + item { + id + __typename + } + index { + id + title + controllerDID { + id + } + signerPublicKey + signerFunction + createdAt + updatedAt + deletedAt + } createdAt updatedAt deletedAt diff --git a/api/src/services/index.js b/api/src/services/index.js index e3de30a0..1087aef2 100644 --- a/api/src/services/index.js +++ b/api/src/services/index.js @@ -33,8 +33,7 @@ export class IndexService { index.did = did; } - index.ownerDID = this.getOwnerProfile(index); - delete index.controllerDID; + index.controllerDID = this.getOwnerProfile(index); return index; } async getIndexById(id) { diff --git a/api/src/services/item.js b/api/src/services/item.js index 373964f2..868facd7 100644 --- a/api/src/services/item.js +++ b/api/src/services/item.js @@ -1,6 +1,7 @@ import { ComposeClient } from "@composedb/client"; import { removePrefixFromKeys, getCurrentDateTime } from "../utils/helpers.js"; import { indexItemFragment } from "../types/fragments.js"; +import { IndexService } from "./index.js"; const transformIndexItem = (indexItem) => { const { __typename: type, indexedAt: _, ...rest } = indexItem.item; @@ -20,6 +21,7 @@ export class ItemService { definition, }); this.did = null; + this.indexService = new IndexService(definition); } setSession(session) { @@ -111,53 +113,64 @@ export class ItemService { async getIndexItems(indexId, cursor = null, limit = 24) { try { let cursorFilter = cursor ? `after: "${cursor}",` : ""; + const index = await this.indexService.getIndexById(indexId); let { data, errors } = await this.client.executeQuery(`{ - indexItemIndex(first: ${limit}, ${cursorFilter} filters: { + node(id: "${indexId}") { + ... on Index { + items( + first: 10 + ${cursorFilter} + account: "${index.controllerDID.id}" + filters: { where: { - indexId: { equalTo: "${indexId}"}, deletedAt: {isNull: true} } - }, sorting: { createdAt: DESC}) { - pageInfo { - endCursor - } - edges { - node { - ${indexItemFragment} - } + } + sorting: { createdAt: DESC} + ) { + pageInfo { + endCursor + } + edges { + node { + ${indexItemFragment} } } - }`); + } + } + } + }`); // Handle GraphQL errors if (errors) { throw new Error(`Error getting index item: ${JSON.stringify(errors)}`); } // Validate the data response - if (!data || !data.indexItemIndex || !data.indexItemIndex.edges) { + if (!data || !data.node || !data.node.items || !data.node.items.edges) { throw new Error("Invalid response data"); } - if (data.indexItemIndex.edges.length === 0) { + const items = data.node.items.edges; + + if (items.length === 0) { return { endCursor: null, items: [], }; } - data.indexItemIndex.edges.map((e) => { + items.map((e) => { e.node.item = removePrefixFromKeys( e.node.item, `${e.node.item.__typename}_`, ); return e; }); - console.log(data.indexItemIndex.edges); return { //Todo fix itemId to id - endCursor: data.indexItemIndex.pageInfo.endCursor, - items: data.indexItemIndex.edges.map((e) => transformIndexItem(e.node)), + endCursor: data.node.items.pageInfo.endCursor, + items: items.map((e) => transformIndexItem(e.node)), }; } catch (error) { // Log the error and rethrow it for external handling @@ -233,7 +246,7 @@ export class ItemService { const { data, errors } = await this.client.executeQuery(`{ indexItemIndex(first: ${limit}, ${cursorFilter} filters: { where: { - itemId: { in: ["${itemIds.join('","')}"]}, + itemId: { in: ${JSON.stringify(itemIds)}}}, deletedAt: {isNull: true} } }, sorting: { createdAt: DESC}) { diff --git a/api/src/types/fragments.js b/api/src/types/fragments.js index 7000d53b..252f8086 100644 --- a/api/src/types/fragments.js +++ b/api/src/types/fragments.js @@ -46,6 +46,9 @@ export const indexItemFragment = ` id indexId itemId + controllerDID { + id + } modelId createdAt updatedAt @@ -60,6 +63,9 @@ export const indexItemFragment = ` title signerPublicKey signerFunction + controllerDID { + id + } createdAt updatedAt deletedAt diff --git a/indexer/README.md b/indexer/README.md index d91c5a08..cb5f4fed 100644 --- a/indexer/README.md +++ b/indexer/README.md @@ -42,8 +42,8 @@ Start NestJs application with the correct access with your environment. 1. Node>=21.x.x 1. yarn>=0.5.x - ### Build + ```sh yarn build yarn start:dev @@ -53,7 +53,7 @@ yarn start:dev ```mermaid sequenceDiagram - + participant ui as Web SDK participant api as Index API participant consumer as KafkaConsumer @@ -75,11 +75,11 @@ sequenceDiagram indexer->>+api: Return content api->>-compose: Index document to Ceramic - compose->>+consumer: Successfully indexed to Ceramic + compose->>+consumer: Successfully indexed to Ceramic consumer->>+api: Able to index Ceramic api->>+ui: Succesfully initializes indexing - + Note left of ui: Embedding consumer->>+indexer: Request embeddings for indexed document @@ -94,8 +94,8 @@ sequenceDiagram consumer->>+indexer: Index to ChromaDb with the embeddings indexer->>+chroma: Index docs with metadata and embeddings - chroma->>+indexer: Return Success - indexer->>+consumer: Publish Success + chroma->>+indexer: Return Success + indexer->>+consumer: Publish Success Note over ui, unstructured: Discovery Flow @@ -116,8 +116,8 @@ sequenceDiagram ``` - ## A. Indexer Controller + The Indexer Controller is designed for crawling, embedding extraction, and indexing operations. Below are the details of its endpoints: ### 1. Crawl Document Content @@ -126,10 +126,9 @@ The Indexer Controller is designed for crawling, embedding extraction, and index - **Endpoint**: /indexer/crawl - **Description**: Crawls the document content from a given URL using Unstructured.io API. - **Body Parameters**: - - `url (string)`: The URL of the document to crawl. + - `url (string)`: The URL of the document to crawl. - **Response**: Returns a key-value pair of 'content' (string) representing the textual content of the document. - - **Example Usage** - curl request: @@ -158,10 +157,11 @@ The Indexer Controller is designed for crawling, embedding extraction, and index - **Endpoint**: /indexer/embeddings - **Description**: Extracts embeddings for the given document using OpenAI embeddings. - **Body Parameters**: - - `content (string)`: The textual content of the document. + - `content (string)`: The textual content of the document. - **Response**: Returns a list of floats representing the embedding vector. - **Example Usage** + - curl request ```sh @@ -191,20 +191,20 @@ The Indexer Controller is designed for crawling, embedding extraction, and index - **Endpoint**: /indexer/index - **Description**: Adds a document to the ChromaDB database with the appropriate metadata and content. - **Body Parameters**: An object containing the following keys: - - `indexId (string)`: The id string of the Index - - `indexTitle (string)`: The title of the index - - `indexCreatedAt (date)`: The create timestamp of index - - `indexUpdatedAt (date)`: The last update timestamp of index - - `indexDeletedAt (date)`: The delete timestamp of index - - `indexOwnerDID (string)`: The owner key of the index - - `webPageId (string)`: The id string of the webpage - - `webPageTitle (string)`: The title of the web page - - `webPageUrl (string)`: The url of the web page - - `webPageCreatedAt (date)`: The create timestamp of web page - - `webPageContent (string)`: The string of content of web page - - `webPageUpdatedAt (date)`: The last update timestamp of index - - `webPageDeletedAt (date)`: The delete timestamp of index - - `vector (number[])`: The embedding of the WebPageContent + - `indexId (string)`: The id string of the Index + - `indexTitle (string)`: The title of the index + - `indexCreatedAt (date)`: The create timestamp of index + - `indexUpdatedAt (date)`: The last update timestamp of index + - `indexDeletedAt (date)`: The delete timestamp of index + - `indexControllerDID (string)`: The owner key of the index + - `webPageId (string)`: The id string of the webpage + - `webPageTitle (string)`: The title of the web page + - `webPageUrl (string)`: The url of the web page + - `webPageCreatedAt (date)`: The create timestamp of web page + - `webPageContent (string)`: The string of content of web page + - `webPageUpdatedAt (date)`: The last update timestamp of index + - `webPageDeletedAt (date)`: The delete timestamp of index + - `vector (number[])`: The embedding of the WebPageContent - **Response**: Returns a success or error message. - **Example Usage** @@ -216,6 +216,7 @@ The Indexer Controller is designed for crawling, embedding extraction, and index -H 'X-API-KEY: your_api_key_here' \ -d '{"indexId": "1", "indexTitle": "Title", ...}' ``` + - Python request: ```python @@ -233,7 +234,6 @@ The Indexer Controller is designed for crawling, embedding extraction, and index # 200, { "message": "Index item IndexItemID_0 succesfully upddated" } ``` - ### 4. Update Document Metadata or Content - **Method**: `PUT` @@ -251,6 +251,7 @@ The Indexer Controller is designed for crawling, embedding extraction, and index -H "Content-Type: application/json" \ -d '{"indexId": "1", "indexTitle": "Updated Title"}' ``` + - Python request: ```python @@ -264,14 +265,14 @@ The Indexer Controller is designed for crawling, embedding extraction, and index # 200, { "message": "Index item IndexItemID_0 succesfully upddated" } ``` - ### 5. Delete Index - **Method**: `DELETE` - **Endpoint**: /indexer/index - **Description**: Deletes the given index items from the "indexId". - **Body Parameters**: An object with the key indexId. - - `indexId (string)`: The id string of the Index + + - `indexId (string)`: The id string of the Index - **Response**: Returns a success or error message. @@ -284,6 +285,7 @@ The Indexer Controller is designed for crawling, embedding extraction, and index -H "Content-Type: application/json" \ -d '{"indexId": "1"}' ``` + - Python request: ```python @@ -297,19 +299,20 @@ The Indexer Controller is designed for crawling, embedding extraction, and index # 200, { "message": "Index IndexID_0 wirh IndexItemIDS [ 'IndexItemID_3', 'IndexItemID_5' ] succesfully deleted" } ``` - ### 6. Delete Index Item - **Method**: `DELETE` -- **Endpoint**: ``/indexer/item`` +- **Endpoint**: `/indexer/item` - **Description**: Deletes the given index item from the "indexId" and "indexItemId". - **Body Parameters**: An object with the keys indexId and indexItemId. - - `indexId (string)`: The id string of the Index - - `indexItemId (string)`: The id string of the IndexItem + + - `indexId (string)`: The id string of the Index + - `indexItemId (string)`: The id string of the IndexItem - **Response**: Returns a success or error message. - **Example Usage** + - curl request: ```sh @@ -317,6 +320,7 @@ The Indexer Controller is designed for crawling, embedding extraction, and index -H "Content-Type: application/json" \ -d '{"indexId": "1", "indexItemId": "2"}' ``` + - Python request: ```python @@ -330,7 +334,6 @@ The Indexer Controller is designed for crawling, embedding extraction, and index # 200, { "message": "Index item IndexItemID_0 succesfully deleted" } ``` - ## B. Chat Controller The Chat Controller handles operations related to generating content based on a given input and querying the database. @@ -338,18 +341,19 @@ The Chat Controller handles operations related to generating content based on a ### 7. Generate Content for Question - **Method**: `POST` -- **Endpoint**: ``/chat/stream`` +- **Endpoint**: `/chat/stream` - **Description**: For a given "question" and "chat_history", generates content for the question. -- **Body Parameters**: An object containing - * `question (string)`: The string of last chat input - - * `chat_history (string)`: The list of input objects from both user and agent with message role and content - - * `indexIds (string[])`: The list of id strings to ask +- **Body Parameters**: An object containing + - `question (string)`: The string of last chat input + + - `chat_history (string)`: The list of input objects from both user and agent with message role and content + + - `indexIds (string[])`: The list of id strings to ask - **Response**: Returns "answer" text and "source" which are the list of "webPageId". - **Example Usage** + - curl request: ```sh @@ -357,6 +361,7 @@ The Chat Controller handles operations related to generating content based on a -H "Content-Type: application/json" \ -d '{"question": "What is AI?", "chat_history": "...", "index_id": "1", "model_type": "...", "chain_type": "..."}' ``` + - Python request: ```python @@ -379,32 +384,29 @@ The Chat Controller handles operations related to generating content based on a # } ``` - ## C. Query Controller - Below, you will find detailed descriptions and usage instructions for our three main endpoints: query, search, and autocomplete. - ### 1. Query Endpoint - - -- **Method**: ``POST`` -- **URL**: ``discovery/query`` +- **Method**: `POST` +- **URL**: `discovery/query` - **Description**: Returns a list of item results for user searches within specified index(es) using a given query string. It also supports [metadata filtering](https://docs.trychroma.com/usage-guide#using-where-filters) through ChromaDB . - **Body Parameters**: An object containing - - ``query (string)``: The query string to search. - - ``indexIds (string[])``: Array of index IDs to search within. - - ``page (int)``: The page number of results to return. - - ``limit (int)``: The number of results per page. - - ``filters (Object)``: Filters to apply on the search results (ChromaFilter). - - ``sort (int)``: The field to sort the results by. - - ``desc (int)``: Boolean indicating whether the sorting should be in descending order. + - `query (string)`: The query string to search. + + - `indexIds (string[])`: Array of index IDs to search within. + - `page (int)`: The page number of results to return. + - `limit (int)`: The number of results per page. + - `filters (Object)`: Filters to apply on the search results (ChromaFilter). + - `sort (int)`: The field to sort the results by. + - `desc (int)`: Boolean indicating whether the sorting should be in descending order. -- **Response**: Returns "items" which are the list of "webPageId". +- **Response**: Returns "items" which are the list of "webPageId". - **Example Usage** + - curl request: ```sh @@ -412,6 +414,7 @@ Below, you will find detailed descriptions and usage instructions for our three -H "Content-Type: application/json" \ -d '{ "query": "string", "indexIds": ["string"], "page": 0, "limit": 0, "filters": { "indexCreatedAt": { "$gte": "2024-02-28T11:08:59.353Z" }, "sort": "string", "desc": true }' ``` + - Python request: ```python @@ -435,35 +438,32 @@ Below, you will find detailed descriptions and usage instructions for our three print(response.text) ``` - - - ### 2. Search Endpoint - - -- **Method**: ``POST`` -- **URL**: ``discovery/{db}/search/{indexIds}`` +- **Method**: `POST` +- **URL**: `discovery/{db}/search/{indexIds}` - **Description**: Performs a search using embeddings in ChromaDB. This endpoint is similar to the query endpoint but focuses on embedding-based searches and will support multiple embedding models in the future. - **Body Parameters**: An object containing - - ``embedding``: The embedding to search. - - - ``model``: The embedding model, eg. `text-embedding-ada-002`. + - `embedding`: The embedding to search. + + - `model`: The embedding model, eg. `text-embedding-ada-002`. + + - `indexIds`: Array of index IDs to search within. - - ``indexIds``: Array of index IDs to search within. + - `page`: The page number of results to return. - - ``page``: The page number of results to return. + - `limit`: The number of results per page. - - ``limit``: The number of results per page. + - `filters`: Filters to apply on the search results (ChromaFilter). - - ``filters``: Filters to apply on the search results (ChromaFilter). + - `sort`: The field to sort the results by. - - ``sort``: The field to sort the results by. + - `desc`: Boolean indicating whether the sorting should be in descending order. - - ``desc``: Boolean indicating whether the sorting should be in descending order. -- **Response**: Returns "items" which are the list of "webPageId". +- **Response**: Returns "items" which are the list of "webPageId". - **Example Usage** + - curl request: ```sh @@ -472,6 +472,7 @@ Below, you will find detailed descriptions and usage instructions for our three -H 'X-API-KEY: your_api_key_here' \ -d '{ "embedding": [0.1, 0.2, ....], "indexIds": ["string"], "page": 0, "limit": 0, "filters": "ChromaFilter", "sort": "string", "desc": true }' ``` + - Python request: ```python @@ -497,18 +498,18 @@ Below, you will find detailed descriptions and usage instructions for our three # ] } ``` - ### 3. Autocomplete Endpoint -- **Method**: ``POST`` -- **URL**: ``discovery/autocomplete`` +- **Method**: `POST` +- **URL**: `discovery/autocomplete` - **Description**: Expands the given query via the openai.chat.completions endpoint to increase the specificity of the semantic content. It utilizes the content of the given index documents to provide autocomplete suggestions. - **Body Parameters**: - - `indexIds`: Array of index IDs to search within for autocomplete suggestions. - - `query`: The initial query string for which to provide autocomplete suggestions. - - `n`: The number of autocomplete suggestions to return. + - `indexIds`: Array of index IDs to search within for autocomplete suggestions. + - `query`: The initial query string for which to provide autocomplete suggestions. + - `n`: The number of autocomplete suggestions to return. - **Response**: Returns "answer" text and "source" which are the list of "webPageId". - **Example Usage** + - curl request: ```sh @@ -517,6 +518,7 @@ Below, you will find detailed descriptions and usage instructions for our three -H 'X-API-KEY: your_api_key_here' \ -d '{ "indexIds": ["string"], "query": "string", "n": 0 }' ``` + - Python request: ```python @@ -536,5 +538,5 @@ Below, you will find detailed descriptions and usage instructions for our three # { "webPageItemId": "sknfljdfd", "similarity": 0.92 }, # { "webPageItemId": "mcdafşdş", "similarity": 0.81 }, # ...... - # ] + # ] ``` diff --git a/sdk/js/src/types.ts b/sdk/js/src/types.ts index bbcdbf01..d74058e6 100644 --- a/sdk/js/src/types.ts +++ b/sdk/js/src/types.ts @@ -13,7 +13,7 @@ export interface IIndex { owner: boolean; creator: boolean; }; - ownerDID: IUser; + controllerDID: IUser; createdAt: string; updatedAt: string; deletedAt: string; diff --git a/web-app/src/components/sections/IndexConversation/IndexConversationHeader.tsx b/web-app/src/components/sections/IndexConversation/IndexConversationHeader.tsx index d97dfaa1..5f1feb71 100644 --- a/web-app/src/components/sections/IndexConversation/IndexConversationHeader.tsx +++ b/web-app/src/components/sections/IndexConversation/IndexConversationHeader.tsx @@ -126,14 +126,17 @@ export const IndexConversationHeader: FC = () => { display: "flex", width: "fit-content", }} - href={`/${viewedIndex?.ownerDID?.id!}`} + href={`/${viewedIndex?.controllerDID?.id!}`} >