Skip to content

Commit

Permalink
docs: use algolia for searchability (#8410)
Browse files Browse the repository at this point in the history
  • Loading branch information
ncclementi authored Feb 21, 2024
1 parent ff5d078 commit f8d8ca3
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 0 deletions.
18 changes: 18 additions & 0 deletions .github/workflows/ibis-docs-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,21 @@ jobs:
run: nix develop --ignore-environment --keep NETLIFY_AUTH_TOKEN -c just docs-deploy
env:
NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }}

# Upload index related
- name: Setup Python
uses: actions/setup-python@v5
with:
python_version: "3.10"

- name: Install Algolia API Dependencies
run: |
python -m pip install --upgrade algoliasearch
- name: Create and Upload Index
run: |
python .github/workflows/upload-algolia.py
env:
ALGOLIA_WRITE_API_KEY: ${{ secrets.ALGOLIA_WRITE_API_KEY }}
ALGOLIA_APP_ID: HS77W8GWM1
ALGOLIA_INDEX: prod_ibis
60 changes: 60 additions & 0 deletions .github/workflows/upload-algolia.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from __future__ import annotations # noqa: INP001

import json
import os
from urllib.request import urlopen

from algoliasearch.search_client import SearchClient

api_key = os.environ["ALGOLIA_WRITE_API_KEY"]
app_id = os.environ["ALGOLIA_APP_ID"]
index_name = os.environ["ALGOLIA_INDEX"]


def truncate_string(string, max_size):
# Encode the string to bytes using UTF-8 encoding
encoded_string = string.encode("utf-8")

# Truncate the bytes to ensure the size is smaller than max_bytes
truncated_bytes = encoded_string[: max_size - 1]

# Decode the truncated bytes back to string
truncated_string = truncated_bytes.decode("utf-8", errors="ignore")

return truncated_string


def main():
client = SearchClient.create(app_id, api_key)
index = client.init_index(index_name)

# Download the index generated by quarto from the ibis website
with urlopen("https://ibis-project.org/search.json") as response:
search = json.loads(response.read())

# According to algolia docs, for the build plan each record (in our case this
# is search[i]) has a limit of 10KB.
# (see https://support.algolia.com/hc/en-us/articles/4406981897617-Is-there-a-size-limit-for-my-index-records and
# https://www.algolia.com/doc/guides/scaling/algolia-service-limits/)
# Every key in our record is pretty small except for the "text" one.
# I tried truncating it to < 10_000 and even though we don't get a record
# size error, we keep hitting a AlgoliaUnreachableHostException
# I opened an issue because the error is unhelpful, and unclear.
# https://github.com/algolia/algoliasearch-client-python/issues/565

# It wasn't until I cut the "text" field to max_size=1000 that I was able to
# get an index. My guess is we are hitting another limitation, but I was not
# able to find anything on the docs.

max_size = 1_000
for obj in search:
if len(obj["text"].encode("utf-8")) > max_size:
obj["text"] = truncate_string(obj["text"], max_size)
size = len(obj["text"].encode("utf-8"))
assert size < max_size

index.replace_all_objects(search)


if __name__ == "__main__":
main()
6 changes: 6 additions & 0 deletions docs/_quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ website:
search:
location: navbar
type: overlay
algolia:
index-name: prod_ibis
application-id: HS77W8GWM1
search-only-api-key: 8ca4fcd24da322db857322ae4f79d6f3
analytics-events: true
show-logo: true

# options
reader-mode: false
Expand Down

0 comments on commit f8d8ca3

Please sign in to comment.