Skip to content

Commit

Permalink
Zotero pagination (#288)
Browse files Browse the repository at this point in the history
* Add responses library for mocking requests

* Mock Zotero API and bibtex entries

Instead of setting up a testing environment in Zotero, I have mocked the
API documented in [Zotero API
docs](https://www.zotero.org/support/dev/web_api/v3/basics) especially
with the pagination feature that splits the bibtex into chunks with a
maximum length that they call 'limit'. In the fixture, the limit is set
to 25 or the default.

Along with the mock API, I needed a function that can generate n bibtex
entries to test paginated and non-paginated results.

* Add test loading bibtex from Zotero

To test paginated and non-paginated responses, this test parametrizes
the mock_zotero_api fixture with API endpoints with 4 and 150 results
respectively.

* Add function to handle Zotero API requests

Zotero API's "Sorting and Pagination"
[docs](https://www.zotero.org/support/dev/web_api/v3/basics) explain
that results are limited to an integer number of results that can
increase to a maximum of 100 results per request. If there are more
results that match the search criteria, a "Link" header will be added
with a `rel=next` link. The `tempfile_from_zotero` function leverages
the response header and continues requesting the `next` url until all
results are returned. To prevent runaway conditions, an arbitrary hard
limit of 999 has been placed.

* Sanitize Zotero API URL before request

To prevent the user from adding problematic query params to the Zotero
URL, the `sanitize_zotero_query()` function ensures that the requested
format is bibtex and the limit is set to the maximum permitted by Zotero
to reduce the total number of requests.

The mocked Zotero API needs to represent these latest changes too.

* Add responses to GHA testing deps
  • Loading branch information
jiaweikho authored Jan 16, 2025
1 parent 632ad7a commit dd0695f
Show file tree
Hide file tree
Showing 5 changed files with 156 additions and 1 deletion.
2 changes: 1 addition & 1 deletion .github/workflows/testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ jobs:
- name: Install Python dependencies
run: |
python -m pip install --upgrade pip
pip install pytest pytest-cov
pip install pytest pytest-cov responses
- name: Test with pytest
run: |
Expand Down
1 change: 1 addition & 0 deletions requirements-testing.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@ pytest==8.3.4
pytest-cov==6.0.0
pytest-pretty==1.2.0
mypy==1.14.1
responses==0.25.6
ruff==0.9.1
types-requests~=2.32.0
60 changes: 60 additions & 0 deletions src/mkdocs_bibtex/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import re
import requests
import tempfile
import urllib.parse
from collections import OrderedDict
from functools import lru_cache
from itertools import groupby
Expand Down Expand Up @@ -290,6 +291,8 @@ def format_bibliography(citation_quads):

def tempfile_from_url(name, url, suffix):
log.debug(f"Downloading {name} from URL {url} to temporary file...")
if urllib.parse.urlparse(url).hostname == "api.zotero.org":
return tempfile_from_zotero_url(name, url, suffix)
for i in range(3):
try:
dl = requests.get(url)
Expand All @@ -309,3 +312,60 @@ def tempfile_from_url(name, url, suffix):
raise RuntimeError(
f"Couldn't successfully download the url: {url}"
) # pragma: no cover


def tempfile_from_zotero_url(name: str, url: str, suffix: str) -> str:
"""Download bibfile from the Zotero API."""
log.debug(f"Downloading {name} from Zotero at {url}")
bib_contents = ""

url = sanitize_zotero_query(url)

# Limit the pages requested to 999 arbitrarily. This will support a maximum of ~100k items
for page_num in range(999):
for _ in range(3):
try:
response = requests.get(url)
if response.status_code != 200:
msg = f"Couldn't download the url: {url}.\nStatus Code: {response.status_code}"
raise RuntimeError(msg)
break
except requests.exceptions.RequestException: # pragma: no cover
pass

bib_contents += response.text
try:
url = response.links["next"]["url"]
except KeyError:
log.debug(f"Downloaded {page_num}(s) from {url}")
break
else:
log.debug(f"Exceeded the maximum number of pages. Found: {page_num} pages")
with tempfile.NamedTemporaryFile(mode="wt", encoding="utf-8", suffix=suffix, delete=False) as file:
file.write(bib_contents)
log.info(f"{name} downloaded from URL {url} to temporary file ({file})")
return file.name


def sanitize_zotero_query(url: str) -> str:
"""Sanitize query params in the Zotero URL.
The query params are amended to meet the following requirements:
- `mkdocs-bibtex` expects all bib data to be in bibtex format.
- Requesting the maximum number of items (100) reduces the requests
required, hence reducing load times.
"""
updated_query_params = {"format": "bibtex", "limit": 100}

parsed_url = urllib.parse.urlparse(url)

query_params = dict(urllib.parse.parse_qsl(parsed_url.query))

return urllib.parse.ParseResult(
scheme=parsed_url.scheme,
netloc=parsed_url.netloc,
path=parsed_url.path,
params=parsed_url.params,
query=urllib.parse.urlencode(query={**query_params, **updated_query_params}),
fragment=parsed_url.fragment,
).geturl()
64 changes: 64 additions & 0 deletions test_files/test_plugin.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,42 @@
import collections.abc
import os
import random
import string

import pytest
import responses

from mkdocs_bibtex.plugin import BibTexPlugin

module_dir = os.path.dirname(os.path.abspath(__file__))
test_files_dir = os.path.abspath(os.path.join(module_dir, "..", "test_files"))
MOCK_ZOTERO_URL = "https://api.zotero.org/groups/FOO/collections/BAR/items?format=bibtex"


@pytest.fixture
def mock_zotero_api(request: pytest.FixtureRequest) -> collections.abc.Generator[responses.RequestsMock]:
zotero_api_url = "https://api.zotero.org/groups/FOO/collections/BAR/items?format=bibtex&limit=100"
bibtex_contents = generate_bibtex_entries(request.param)

limit = 100
pages = [bibtex_contents[i : i + limit] for i in range(0, len(bibtex_contents), limit)]

with responses.RequestsMock() as mock_api:
for page_num, page in enumerate(pages):
current_start = "" if page_num == 0 else f"&start={page_num * limit}"
next_start = f"&start={(page_num + 1) * limit}"
mock_api.add(
responses.Response(
method="GET",
url=f"{zotero_api_url}{current_start}",
json="\n".join(page),
headers={}
if page_num == len(pages) - 1
else {"Link": f"<{zotero_api_url}{next_start}>; rel='next'"},
)
)

yield mock_api


@pytest.fixture
Expand Down Expand Up @@ -48,6 +79,17 @@ def test_bibtex_loading_bibdir():
assert len(plugin.bib_data.entries) == 2


@pytest.mark.parametrize(("mock_zotero_api", "number_of_entries"), ((4, 4), (150, 150)), indirect=["mock_zotero_api"])
def test_bibtex_loading_zotero(mock_zotero_api: responses.RequestsMock, number_of_entries: int) -> None:
plugin = BibTexPlugin()
plugin.load_config(
options={"bib_file": MOCK_ZOTERO_URL},
config_file_path=test_files_dir,
)

plugin.on_config(plugin.config)
assert len(plugin.bib_data.entries) == number_of_entries

def test_on_page_markdown(plugin):
"""
This function just tests to make sure the rendered markdown changees with
Expand Down Expand Up @@ -106,3 +148,25 @@ def test_footnote_formatting_config(plugin):

with pytest.raises(Exception):
bad_plugin.on_config(bad_plugin.config)

def generate_bibtex_entries(n: int) -> list[str]:
"""Generates n random bibtex entries."""

entries = []

for i in range(n):
author_first = "".join(random.choices(string.ascii_letters, k=8))
author_last = "".join(random.choices(string.ascii_letters, k=8))
title = "".join(random.choices(string.ascii_letters, k=10))
journal = "".join(random.choices(string.ascii_uppercase, k=5))
year = str(random.randint(1950, 2025))

entries.append(f"""
@article{{{author_last}_{i}}},
title = {{{title}}},
volume = {{1}},
journal = {{{journal}}},
author = {{{author_last}, {author_first}}},
year = {{{year}}},
""")
return entries
30 changes: 30 additions & 0 deletions test_files/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
format_simple,
format_pandoc,
extract_cite_keys,
sanitize_zotero_query,
)

from mkdocs_bibtex.plugin import parse_file
Expand Down Expand Up @@ -75,3 +76,32 @@ def test_extract_cite_key():
"""
assert extract_cite_keys("[@test]") == ["test"]
assert extract_cite_keys("[@test.3]") == ["test.3"]


EXAMPLE_ZOTERO_API_ENDPOINT = "https://api.zotero.org/groups/FOO/collections/BAR/items"


@pytest.mark.parametrize(
("zotero_url", "expected_sanitized_url"),
(
(f"{EXAMPLE_ZOTERO_API_ENDPOINT}", f"{EXAMPLE_ZOTERO_API_ENDPOINT}?format=bibtex&limit=100"),
(
f"{EXAMPLE_ZOTERO_API_ENDPOINT}?format=bibtex&limit=25",
f"{EXAMPLE_ZOTERO_API_ENDPOINT}?format=bibtex&limit=100",
),
(
f"{EXAMPLE_ZOTERO_API_ENDPOINT}?format=json",
f"{EXAMPLE_ZOTERO_API_ENDPOINT}?format=bibtex&limit=100",
),
(
f"{EXAMPLE_ZOTERO_API_ENDPOINT}?sort=dateAdded",
f"{EXAMPLE_ZOTERO_API_ENDPOINT}?sort=dateAdded&format=bibtex&limit=100",
),
(
f"{EXAMPLE_ZOTERO_API_ENDPOINT}?sort=dateAdded&sort=publisher",
f"{EXAMPLE_ZOTERO_API_ENDPOINT}?sort=publisher&format=bibtex&limit=100",
),
),
)
def test_sanitize_zotero_query(zotero_url: str, expected_sanitized_url: str) -> None:
assert sanitize_zotero_query(url=zotero_url) == expected_sanitized_url

0 comments on commit dd0695f

Please sign in to comment.