Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to be consistent with v4 of algoliasearch #39

Merged
merged 7 commits into from
Oct 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ jobs:

strategy:
matrix:
python-version: ['3.10', '3.11', '3.12', '3.13']
python-version: ['3.11', '3.12', '3.13']

steps:
- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,5 @@ dmypy.json

# Pyre type checker
.pyre/

_version.py
84 changes: 43 additions & 41 deletions astropylibrarian/algolia/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,19 @@
dry-run operations.
"""

from __future__ import annotations

import logging
import uuid
from copy import deepcopy
from typing import (
TYPE_CHECKING,
Any,
AsyncIterator,
Dict,
Iterator,
List,
Optional,
Type,
Union,
)

from algoliasearch.search_client import SearchClient

if TYPE_CHECKING:
from types import TracebackType
from types import TracebackType
from typing import Any, AsyncIterator, Iterator, Type, Union

from algoliasearch.search_index_async import SearchIndexAsync
from algoliasearch.search.client import SearchClient
from algoliasearch.search.models.batch_response import BatchResponse
from algoliasearch.search.models.browse_params_object import BrowseParamsObject
from algoliasearch.search.models.browse_response import BrowseResponse
from algoliasearch.search.models.deleted_at_response import DeletedAtResponse


AlgoliaIndexType = Union["SearchIndexAsync", "MockAlgoliaIndex"]
AlgoliaIndexType = Union["AlgoliaIndex", "MockAlgoliaIndex"]
"""Type annotation alias supporting the return types of the `AlgoliaIndex` and
`MockAlgoliaIndex` context managers.
"""
Expand Down Expand Up @@ -80,23 +67,36 @@ class AlgoliaIndex(BaseAlgoliaIndex):
Name of the Algolia index.
"""

async def __aenter__(self) -> SearchIndexAsync:
async def __aenter__(self) -> SearchClient:
self._logger.debug("Opening algolia client")
self.algolia_client = SearchClient.create(self.app_id, self._key)
self._logger.debug("Initializing algolia index")
self.index = self.algolia_client.init_index(self.name)
return self.index
self.algolia_client = SearchClient(self.app_id, self._key)
return self.algolia_client

async def __aexit__(
self,
exc_type: Optional[Type[BaseException]],
exc: Optional[Exception],
tb: Optional[TracebackType],
exc_type: Type[BaseException] | None,
exc: Exception | None,
tb: TracebackType | None,
) -> None:
self._logger.debug("Closing algolia client")
await self.algolia_client.close_async()
await self.algolia_client.close()
self._logger.debug("Finished closing algolia client")

async def browse_objects_async(
self, browse_params: BrowseParamsObject
) -> BrowseResponse:
return await self.algolia_client.browse_objects(
index_name=self.name, aggregator=None, browse_params=browse_params
)

async def save_objects_async(
self, objects: list[dict[str, Any]]
) -> list[BatchResponse]:
return self.algolia_client.save_objects(self.name, objects)

async def delete_objects_async(self, objectids: list[str]) -> list[BatchResponse]:
return self.algolia_client.delete_objects(self.name, objectids)


class MockAlgoliaIndex(BaseAlgoliaIndex):
"""A mock Algolia index client.
Expand All @@ -117,39 +117,41 @@ class MockAlgoliaIndex(BaseAlgoliaIndex):

async def __aenter__(self) -> "MockAlgoliaIndex":
self._logger.debug("Creating mock Algolia index")
self._saved_objects: List[Dict] = []
self._saved_objects: list[dict] = []
return self

async def __aexit__(
self,
exc_type: Optional[Type[BaseException]],
exc: Optional[Exception],
tb: Optional[TracebackType],
exc_type: Type[BaseException] | None,
exc: Exception | None,
tb: TracebackType | None,
) -> None:
self._logger.debug("Closing MockAlgoliaIndex")

async def save_objects_async(
self,
objects: Union[List[Dict], Iterator[Dict]],
request_options: Optional[Dict[str, Any]] = None,
) -> MockMultiResponse:
objects: list[dict] | Iterator[dict],
request_options: dict[str, Any] | None = None,
) -> "MockMultiResponse":
"""Mock implementation of save_objects_async."""
for obj in objects:
self._saved_objects.append(deepcopy(obj))
return MockMultiResponse()

async def browse_objects_async(
self, search_settings: Dict[str, Any]
) -> AsyncIterator[Dict[str, Any]]:
self, search_settings: dict[str, Any]
) -> AsyncIterator[dict[str, Any]]:
self._logger.debug("Got search settings %s", search_settings)
# FIXME need to flesh out this mock:
# - provide a way to seed data
# - use attributesToRetrieve to inform what attributes are sent back
for _ in range(5):
yield {}

async def delete_objects_async(self, objectids: List[str]) -> List[str]:
return objectids
async def delete_objects_async(
self, objectids: list[str]
) -> list[DeletedAtResponse]:
return [DeletedAtResponse(task_id=0, deleted_at="") for _ in objectids]


class MockMultiResponse:
Expand Down
36 changes: 14 additions & 22 deletions astropylibrarian/workflows/deleterooturl.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,23 @@
# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""Workflow for deleting all Algolia records associated with a root URL."""

from __future__ import annotations

import logging
from typing import TYPE_CHECKING

from astropylibrarian.algolia.client import escape_facet_value
from typing import Any, AsyncIterator

if TYPE_CHECKING:
from typing import Any, AsyncIterator, Dict, List
from algoliasearch.search.models.browse_params_object import BrowseParamsObject

from astropylibrarian.algolia.client import AlgoliaIndexType
from astropylibrarian.algolia.client import AlgoliaIndexType, escape_facet_value

logger = logging.getLogger(__name__)


async def delete_root_url(
*, root_url: str, algolia_index: AlgoliaIndexType
) -> List[str]:
) -> list[str]:
"""Delete all Algolia records associated with a ``root_url``."""
object_ids: List[str] = []
object_ids: list[str] = []
async for record in search_for_records(
index=algolia_index, root_url=root_url
algolia_index=algolia_index, root_url=root_url
):
if record["root_url"] != root_url:
logger.warning(
Expand All @@ -35,25 +30,22 @@ async def delete_root_url(

logger.debug("Found %d objects for deletion", len(object_ids))

response = await algolia_index.delete_objects_async(object_ids)
logger.debug("Algolia response:\n%s", response.raw_responses)
responses = await algolia_index.delete_objects_async(object_ids)
logger.debug("Algolia response:\n%s", responses)

logger.info("Deleted %d objects", len(object_ids))

return object_ids


async def search_for_records(
*, index: AlgoliaIndexType, root_url: str
) -> AsyncIterator[Dict[str, Any]]:
*, algolia_index: AlgoliaIndexType, root_url: str
) -> AsyncIterator[dict[str, Any]]:
filters = f"root_url:{escape_facet_value(root_url)}"
logger.debug("Filter:\n%s", filters)

async for result in index.browse_objects_async(
{
"filters": filters,
"attributesToRetrieve": ["root_url"],
"attributesToHighlight": [],
}
):
obj = BrowseParamsObject(
filters=filters, attributes_to_retrieve=["root_url"], attributes_to_highlight=[]
)
async for result in algolia_index.browse_objects_async(obj):
yield result
19 changes: 10 additions & 9 deletions astropylibrarian/workflows/expirerecords.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import logging
from typing import TYPE_CHECKING

from algoliasearch.search.models.browse_params_object import BrowseParamsObject

from astropylibrarian.algolia.client import escape_facet_value

if TYPE_CHECKING:
Expand All @@ -27,21 +29,20 @@ async def expire_old_records(
" AND NOT "
f"root_url:{escape_facet_value(root_url)}"
)
search_settings = {
"filters": filters,
"attributesToRetrieve": ["root_url", "index_epoch"],
"attributesToHighlight": [],
}

obj = BrowseParamsObject(
filters=filters,
attributes_to_retrieve=["root_url", "index_epoch"],
attributes_to_highlight=[],
)
old_object_ids: List[str] = []
async for r in algolia_index.browse_objects_async(search_settings):
async for r in algolia_index.browse_objects_async(obj):
# Double check that we're deleting the right things.
if r["root_url"] != root_url:
logger.warning("root_url does not match: %s", r["baseUrl"])
continue
if r["surrogateKey"] == index_epoch:
logger.warning(
"index_epoch matches current epoch: %s", r["index_epoch"]
)
logger.warning("index_epoch matches current epoch: %s", r["index_epoch"])
continue
old_object_ids.append(r["objectID"])

Expand Down
4 changes: 2 additions & 2 deletions astropylibrarian/workflows/indextutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from pathlib import Path
from typing import TYPE_CHECKING, List

import algoliasearch.exceptions
from algoliasearch.http.exceptions import RequestException

from astropylibrarian.algolia.client import generate_index_epoch
from astropylibrarian.reducers.tutorial import get_tutorial_reducer
Expand Down Expand Up @@ -171,7 +171,7 @@ async def index_tutorial(
saved_object_ids: List[str] = []
try:
response = await algolia_index.save_objects_async(records)
except algoliasearch.exceptions.RequestException as e:
except RequestException as e:
logger.error(
"Error saving objects for tutorial %s:\n%s",
tutorial_html.url,
Expand Down
75 changes: 67 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,46 @@
[project]
name = "astropy-librarian"
authors = [
{name = "J.Sick Codes Inc.", email = "hi@jsick.codes"},
{name = "Jeff Jennings", email = "jeffjennings@users.noreply.github.com"},
{name = "Adrian Price-Whelan", email = "adrian.prw@gmail.com"},
]
license = {text = "BSD 3-Clause License"}
description = "The content crawler that supplies Astropy's web search."
readme = {file = "README.rst", content-type = "text/x-rst"}
requires-python = ">=3.11"
classifiers = [
"Development Status :: 3 - Alpha",
"License :: OSI Approved :: BSD License",
]
dependencies = [
"lxml",
"cssselect",
"algoliasearch>=4,<5",
"aiohttp",
"async_timeout",
"PyYAML",
"pydantic",
"typer",
"more-itertools",
]
dynamic = ["version"]


[project.optional-dependencies]
dev = [
"pytest>=6.1",
"pytest-doctestplus",
"types-setuptools",
"types-PyYAML",
]

[project.urls]
Homepage = "https://github.com/jonathansick/astropy-librarian"

[project.scripts]
astropylibrarian = "astropylibrarian.cli.app:app"

[build-system]
requires = [
"setuptools>=64",
Expand All @@ -9,19 +52,35 @@ build-backend = "setuptools.build_meta"
[tool.setuptools_scm]
version_file = "astropylibrarian/_version.py"

[tool.pytest.ini_options]
doctest_plus = "enabled"

[tool.flake8]
max-line-length = 79

[tool.mypy]
disallow_untyped_defs = true
disallow_incomplete_defs = true
ignore_missing_imports = true
show_error_codes = true
strict_equality = true
warn_redundant_casts = true
warn_unreachable = true
warn_unused_ignores = true

[tool.black]
line-length = 79
target-version = ["py311"]
exclude = '''
/(
\.eggs
| \.git
| \.mypy_cache
| \.tox
| \.venv
| _build
| build
| dist
\.eggs
| \.git
| \.mypy_cache
| \.tox
| \.venv
| _build
| build
| dist
)/
'''

Expand Down
Loading