oramasearch · micheleriva · Aug 4, 2023 · Aug 3, 2023 · Aug 3, 2023 · Aug 3, 2023
diff --git a/README.md b/README.md
@@ -11,7 +11,7 @@
 </h4>
 <br />
 <p align="center">
-  A resilient, innovative and open-source search experience to achieve <br />
+  A resilient, innovative and open-source full-text and vector search experience to achieve <br />
   seamless integration with your infrastructure and data
 </p>
 <br />
@@ -29,10 +29,11 @@
 
 If you need more info, help, or want to provide general feedback on Orama, join
 the
-[Orama Slack channel](https://join.slack.com/t/orama-community/shared_invite/zt-1gzvj0mmt-yJhJ6pnrSGuwqPmPx9uO5Q)
+[Orama Slack channel](https://orama.to/slack)
 
 # Highlighted features
 
+- [Vector Search](https://docs.oramasearch.com/usage/search/vector-search)
 - [Search filters](https://docs.oramasearch.com/usage/search/filters)
 - [Facets](https://docs.oramasearch.com/usage/search/facets)
 - [Fields Boosting](https://docs.oramasearch.com/usage/search/fields-boosting)
@@ -78,13 +79,14 @@ Orama is quite simple to use. The first thing to do is to create a new database
 instance and set an indexing schema:
 
 ```js
-import { create, insert, remove, search } from '@orama/orama'
+import { create, insert, remove, search, searchVector } from '@orama/orama'
 
 const db = await create({
   schema: {
     name: 'string',
     description: 'string',
     price: 'number',
+    embedding: 'vector[1536]', // Vector size must be expressed during schema initialization
     meta: {
       rating: 'number',
     },
@@ -104,6 +106,7 @@ await insert(db, {
   name: 'Wireless Headphones',
   description: 'Experience immersive sound quality with these noise-cancelling wireless headphones.',
   price: 99.99,
+  embedding: [...],
   meta: {
     rating: 4.5,
   },
@@ -113,6 +116,7 @@ await insert(db, {
   name: 'Smart LED Bulb',
   description: 'Control the lighting in your home with this energy-efficient smart LED bulb, compatible with most smart home systems.',
   price: 24.99,
+  embedding: [...],
   meta: {
     rating: 4.3,
   },
@@ -122,6 +126,7 @@ await insert(db, {
   name: 'Portable Charger',
   description: 'Never run out of power on-the-go with this compact and fast-charging portable charger for your devices.',
   price: 29.99,
+  embedding: [...],
   meta: {
     rating: 3.6,
   },
@@ -198,6 +203,15 @@ Result:
 }
 ```
 
+If you want to perform a vector search, you can use the `searchVector` function:
+
+```js
+const searchResult = await searchVector(db, {
+  vector: [...], // OpenAI embedding or similar vector to be used as an input
+  property: 'embedding' // Property to search through. Mandatory for vector search
+})
+```
+
 # Usage with CommonJS
 
 Orama is packaged as ES modules, suitable for Node.js, Deno, Bun and modern browsers.

diff --git a/packages/docs/pages/_meta.json b/packages/docs/pages/_meta.json
@@ -3,6 +3,10 @@
     "type": "menu",
     "title": "Search Features",
     "items": {
+      "vector-search": {
+        "title": "Vector Search",
+        "href": "/usage/search/vector-search"
+      },
       "typo-tolerance": {
         "title": "Typo Tolerance",
         "href": "/usage/search/introduction#typo-tolerance"

diff --git a/packages/docs/pages/index.mdx b/packages/docs/pages/index.mdx
@@ -4,7 +4,7 @@ import { AiFillFileAdd, AiOutlineSearch, AiFillDelete } from 'react-icons/ai'
 
 # Getting Started with Orama
 
-Orama is a fast, batteries-included, full-text search engine entirely written in TypeScript, with zero dependencies. <br /><br />
+Orama is a fast, batteries-included, full-text and vector search engine entirely written in TypeScript, with zero dependencies. <br /><br />
 
 <iframe
   width="100%"

diff --git a/packages/docs/pages/usage/create.mdx b/packages/docs/pages/usage/create.mdx
@@ -18,14 +18,15 @@ If you want to learn more and see real-world examples, check out [this blog post
 The `schema` is an object where the keys are the property names and the values are the property types. \
 Orama supports the following types:
 
-| Type        | Description                                                                 | example                                                                     |
-| ----------- | --------------------------------------------------------------------------- | --------------------------------------------------------------------------- |
-| `string`    | A string of characters.                                                     | `'Hello world'`                                                             |
-| `number`    | A numeric value, either float or integer.                                   | `42`                                                                        |
-| `boolean`   | A boolean value.                                                            | `true`                                                                      |
-| `string[]`  | An array of strings.                                                        | `['red', 'green', 'blue']`                                                  |
-| `number[]`  | An array of numbers.                                                        | `[42, 91, 28.5]`                                                            |
-| `boolean[]` | An array of booleans.                                                       | `[true, false, false]`                                                      |
+| Type             | Description                                                                 | example                                                                     |
+| ---------------- | --------------------------------------------------------------------------- | --------------------------------------------------------------------------- |
+| `string`         | A string of characters.                                                     | `'Hello world'`                                                             |
+| `number`         | A numeric value, either float or integer.                                   | `42`                                                                        |
+| `boolean`        | A boolean value.                                                            | `true`                                                                      |
+| `string[]`       | An array of strings.                                                        | `['red', 'green', 'blue']`                                                  |
+| `number[]`       | An array of numbers.                                                        | `[42, 91, 28.5]`                                                            |
+| `boolean[]`      | An array of booleans.                                                       | `[true, false, false]`                                                      |
+| `vector[<size>]` | A vector of numbers to perform vector search on.                            | `[0.403, 0.192, 0.830]`                                                               |
 
 A database can be as simple as:
 
@@ -75,6 +76,29 @@ const movieDB = await create({
 })
 ```
 
+## Vector properties
+
+Since version `1.2.0`, Orama supports vector search. \
+To run vector queries, you first need to initialize a vector property in the schema:
+
+```javascript copy
+const db = await create({
+  schema: {
+    title: 'string',
+    embedding: 'vector[384]',
+  }
+})
+```
+
+Please note that the size of the vector **must** be specified in the schema. \
+The size of the vector is the number of elements that the vector contains, so make sure to specify the correct size, as performing search on vectors of different sizes will result in unpredictable and mostly wrong results.
+
+If you're using vector properties to search through embeddings, we highly recommend using [HuggingFace's](https://huggingface.co/) `gte-small` model, which has a vector size of `384`.
+
+There is a great article written by Supabase explaining why it might be a better option than OpenAI's `text-embedding-ada-002` model: [https://supabase.com/blog/fewer-dimensions-are-better-pgvector](https://supabase.com/blog/fewer-dimensions-are-better-pgvector).
+
+For performance reasons, we recommend using one vector property per database, even though it's possible to have multiple vector properties in the same Orama instance.
+
 ## Instance ID
 
 Every Orama instance has a unique `id` property, which can be used to identify a given instance when working with multiple databases.

diff --git a/packages/docs/pages/usage/search/_meta.json b/packages/docs/pages/usage/search/_meta.json
@@ -1,5 +1,6 @@
 {
   "introduction": "Searching with Orama",
+  "vector-search": "Vector Search",
   "fields-boosting": "Fields boosting",
   "facets": "Facets",
   "filters": "Filters",

diff --git a/packages/docs/pages/usage/search/vector-search.mdx b/packages/docs/pages/usage/search/vector-search.mdx
@@ -0,0 +1,73 @@
+import { Callout } from 'nextra-theme-docs'
+
+# Vector Search
+
+Since `v1.2.0`, Orama supports **vector search** natively 🎉.
+
+To perform search through vectors, you need to correctly configure your Orama schema, as described in the [create page](/usage/create).
+
+## Performing Vector Search
+
+To perform vector search, you will need to use a new method called `searchVector`, which can be imported from `@orama/orama`:
+
+```js copy
+import { searchVector } from '@orama/orama'
+```
+
+The APIs are very similar to the ones you already know, but with a few differences:
+
+1. Instead of searching for a `term`, you will need to provide a `vector` to search for.
+2. You will need to specify the vector property you want to search on.
+3. At the time of writing, you can only search through one vector property at a time. If you think that this is too limiting, please open a [feature request](https://github.com/oramasearch/orama/issues/new?assignees=&labels=&projects=&template=feature_request.md&title=) to support multiple vector properties at search-time.
+
+Let's see a full example of how to perform vector search:
+
+```js copy
+import { create, insertMultiple, searchVector } from '@orama/orama'
+
+const db = await create({
+  schema: {
+    title: 'string',        // To make it simple, let's pretend that
+    embedding: 'vector[5]', // we are using a 5-dimensional vector.
+  }
+})
+
+await insertMultiple(db, [
+  { title: 'The Prestige', embedding: [0.938293, 0.284951, 0.348264, 0.948276, 0.564720] },
+  { title: 'Barbie',       embedding: [0.192839, 0.028471, 0.284738, 0.937463, 0.092827] },
+  { title: 'Oppenheimer',  embedding: [0.827391, 0.927381, 0.001982, 0.983821, 0.294841] },
+])
+
+const results = await searchVector(db, {
+  vector: [0.938292, 0.284961, 0.248264, 0.748276, 0.264720],
+  property: 'embedding',
+  similarity: 0.8,      // Minimum similarity. Defaults to `0.8`
+  includeVectors: true, // Defaults to `false`
+  limit: 10,            // Defaults to `10`
+  offset: 0,            // Defaults to `0`
+})
+```
+
+The returning object will be exactly the same as the one we would expect from the default `search` method:
+
+```js
+{
+  count: 1,
+  elapsed: {
+    raw: 25000,
+    formatted: '25ms',
+  },
+  hits: [
+    {
+      id: '1-19238',
+      score: 0.812383129,
+      document: {
+        title: 'The Prestige',
+        embedding: [0.938293, 0.284951, 0.348264, 0.948276, 0.564720],
+      }
+    }
+  ]
+}
+```
+
+Since vectors can be quite large, you can also choose to not include them in the response by setting `includeVectors` to `false` (default behavior).
diff --git a/packages/orama/README.md b/packages/orama/README.md
@@ -13,6 +13,7 @@ If you need more info, help, or want to provide general feedback on Orama, join
 
 # Highlighted features
 
+- [Vector Search](https://docs.oramasearch.com/usage/search/vector-search)
 - [Search filters](https://docs.oramasearch.com/usage/search/filters)
 - [Facets](https://docs.oramasearch.com/usage/search/facets)
 - [Fields Boosting](https://docs.oramasearch.com/usage/search/fields-boosting)
@@ -58,13 +59,14 @@ Orama is quite simple to use. The first thing to do is to create a new database
 instance and set an indexing schema:
 
 ```js
-import { create, insert, remove, search } from '@orama/orama'
+import { create, insert, remove, search, searchVector } from '@orama/orama'
 
 const db = await create({
   schema: {
     name: 'string',
     description: 'string',
     price: 'number',
+    embedding: 'vector[1536]', // Vector size must be expressed during schema initialization
     meta: {
       rating: 'number',
     },
@@ -84,26 +86,27 @@ await insert(db, {
   name: 'Wireless Headphones',
   description: 'Experience immersive sound quality with these noise-cancelling wireless headphones.',
   price: 99.99,
+  embedding: [...],
   meta: {
     rating: 4.5,
   },
 })
 
 await insert(db, {
   name: 'Smart LED Bulb',
-  description:
-    'Control the lighting in your home with this energy-efficient smart LED bulb, compatible with most smart home systems.',
+  description: 'Control the lighting in your home with this energy-efficient smart LED bulb, compatible with most smart home systems.',
   price: 24.99,
+  embedding: [...],
   meta: {
     rating: 4.3,
   },
 })
 
 await insert(db, {
   name: 'Portable Charger',
-  description:
-    'Never run out of power on-the-go with this compact and fast-charging portable charger for your devices.',
+  description: 'Never run out of power on-the-go with this compact and fast-charging portable charger for your devices.',
   price: 29.99,
+  embedding: [...],
   meta: {
     rating: 3.6,
   },
@@ -180,6 +183,15 @@ Result:
 }
 ```
 
+If you want to perform a vector search, you can use the `searchVector` function:
+
+```js
+const searchResult = await searchVector(db, {
+  vector: [...], // OpenAI embedding or similar vector to be used as an input
+  property: 'embedding' // Property to search through. Mandatory for vector search
+})
+```
+
 # Usage with CommonJS
 
 Orama is packaged as ES modules, suitable for Node.js, Deno, Bun and modern browsers.

diff --git a/packages/orama/src/cjs/index.cts b/packages/orama/src/cjs/index.cts
@@ -3,6 +3,7 @@ import type { count as esmCount, getByID as esmGetByID } from '../methods/docs.j
 import type { insert as esmInsert, insertMultiple as esminsertMultiple } from '../methods/insert.js'
 import type { remove as esmRemove, removeMultiple as esmRemoveMultiple } from '../methods/remove.js'
 import type { search as esmSearch } from '../methods/search.js'
+import type { searchVector as esmSearchVector } from '../methods/search-vector.js'
 import type { load as esmLoad, save as esmSave } from '../methods/serialization.js'
 import type { update as esmUpdate, updateMultiple as esmUpdateMultiple } from '../methods/update.js'
 
@@ -18,6 +19,7 @@ let _esmSave: typeof esmSave
 let _esmSearch: typeof esmSearch
 let _esmUpdate: typeof esmUpdate
 let _esmUpdateMultiple: typeof esmUpdateMultiple
+let _esmSearchVector: typeof esmSearchVector
 
 export async function count(...args: Parameters<typeof esmCount>): ReturnType<typeof esmCount> {
   if (!_esmCount) {
@@ -133,5 +135,16 @@ export async function updateMultiple(
   return _esmUpdateMultiple(...args)
 }
 
+export async function searchVector(
+  ...args: Parameters<typeof esmSearchVector>
+): ReturnType<typeof esmSearchVector> {
+  if (!_esmSearchVector) {
+    const imported = await import('../methods/search-vector.js')
+    _esmSearchVector = imported.searchVector
+  }
+
+  return _esmSearchVector(...args)
+}
+
 export * as components from './components/defaults.cjs'
 export * as internals from './internals.cjs'
diff --git a/packages/orama/src/components/cosine-similarity.ts b/packages/orama/src/components/cosine-similarity.ts
@@ -0,0 +1,43 @@
+import type { Magnitude, VectorType } from '../types.js'
+
+export type SimilarVector = {
+  id: string
+  score: number
+}
+
+export function getMagnitude(vector: Float32Array, vectorLength: number): number {
+  let magnitude = 0
+  for (let i = 0; i < vectorLength; i++) {
+    magnitude += vector[i] * vector[i]
+  }
+  return Math.sqrt(magnitude)
+}
+
+// @todo: Write plugins for Node and Browsers to use parallel computation for this function
+export function findSimilarVectors(
+  targetVector: Float32Array,
+  vectors: Record<string, [Magnitude, VectorType]>,
+  length: number,
+  threshold = 0.8
+) {
+  const targetMagnitude = getMagnitude(targetVector, length);
+
+  const similarVectors: SimilarVector[] = []
+
+  for (const [vectorId, [magnitude, vector]] of Object.entries(vectors)) {
+    let dotProduct = 0
+
+    for (let i = 0; i < length; i++) {
+      dotProduct += targetVector[i] * vector[i]
+    }
+
+    const similarity = dotProduct / (targetMagnitude * magnitude)
+
+    if (similarity >= threshold) {
+      similarVectors.push({ id: vectorId, score: similarity })
+    }
+  }
+
+  return similarVectors.sort((a, b) => b.score - a.score)
+}
+