diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index f3d05b8..3651289 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -39,7 +39,7 @@ jobs: matrix: os: [ubuntu-latest] python-version: [3.9, '3.10', 3.11, 3.12] - pandoc-version: [2.9.2, 2.14.0.3] + pandoc-version: [2.14.0.3, 3.6.2] runs-on: ${{ matrix.os }} steps: diff --git a/.gitignore b/.gitignore index 894a44c..f73d5e3 100644 --- a/.gitignore +++ b/.gitignore @@ -102,3 +102,6 @@ venv.bak/ # mypy .mypy_cache/ + +# example +example/site \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 32a1ac6..8e8e120 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,15 +1,15 @@ repos: - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 - hooks: - - id: check-yaml - - id: end-of-file-fixer - - id: trailing-whitespace - - repo: https://github.com/ambv/black - rev: 22.12.0 - hooks: - - id: black - - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: 'v0.0.207' - hooks: - - id: ruff + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace + - repo: https://github.com/charliermarsh/ruff-pre-commit + rev: 'v0.9.2' + hooks: + # Run the linter. + - id: ruff + args: [ --fix ] + # Run the formatter. + - id: ruff-format diff --git a/README.md b/README.md index 97b1692..62610f5 100644 --- a/README.md +++ b/README.md @@ -40,44 +40,90 @@ The footnotes extension is how citations are linked for now. - `bib_by_default` - Automatically append the `bib_command` at the end of every markdown document, defaults to `true` - `full_bib_command` - The syntax to render your entire bibliography, defaults to `\full_bibliography` - `csl_file` - The path or url to a bibtex CSL file, specifying your citation format. Defaults to `None`, which renders in a plain format. A registry of citation styles can be found here: https://github.com/citation-style-language/styles -- `cite_inline` - Whether or not to render citations inline, requires `csl_file` to be specified. Defaults to `False`. ## Usage In your markdown files: -1. Add your citations as you would if you used pandoc, IE: `[@first_cite;@second_cite]` +1. Add your citations as you would if you used pandoc, IE: `[@first_cite;@second_cite]`. 2. Add `\bibliography`, or the value of `bib_command`, to the doc you want your references rendered (if `bib_by_default` is set to true this is automatically applied for every page). 3. (Optional) Add `\full_bibliography`, or the value of `full_bib_command`, to where you want the full bibliography rendered. *Note*: This is currently not working properly, since this plugin can't dictate the order in which files are processed. The best way to ensure the file with the full bibliography gets processed last is to use numbers in front of file/folder names to enforce the order of processing, IE: `01_my_first_file.md` -4. (Optional) Configure the `csl_file` option to dictate the citation text formatting. +4. (Optional) Configure the `csl_file` option to dictate the citation text formatting. This plugin automatically detects if the citation is an inline style and inserts that text when appropriate. ## Debugging +You can run mkdocs with the `--strict` flag to fail building on any citations that don't exist in the bibtex file. + You may wish to use the verbose flag in mkdocs (`-v`) to log debug messages. You should see something like this ```bash (...) -DEBUG - Parsing bibtex file 'docs/bib/papers.bib'... -INFO - SUCCESS Parsing bibtex file 'docs/bib/papers.bib' -DEBUG - Downloading CSL file from URL https://raw.githubusercontent.com/citation-style-language/styles/master/apa-6th-edition.csl to temporary file... -INFO - CSL file downladed from URL https://raw.githubusercontent.com/citation-style-language/styles/master/apa-6th-edition.csl to temporary file () +DEBUG - Reading markdown pages. +DEBUG - Reading: index.md +DEBUG - Running `page_markdown` event from plugin 'bibtex' +WARNING - Citing unknown reference key nonexistent +DEBUG - Converting with pandoc: +DEBUG - --- + link-citations: false + --- + + 0. [@test] + + 1. [@nonexistent] + + 2. [@test, see pp. 100] + + 3. [see @test, pp. 100, 200] + + # References + +[WARNING] Citeproc: citation nonexistent not found + +DEBUG - Pandoc output: +DEBUG - 0. ^1^ + + 1. ^**nonexistent?**^ + + 2. ^1,\ see\ pp. 100^ + + 3. ^see\ 1^ + + # References {#references .unnumbered} + + :::: {#refs .references .csl-bib-body entry-spacing="0" line-spacing="2"} + ::: {#ref-test .csl-entry} + [1. ]{.csl-left-margin}[Author, F. & Author, S. Test title. *Testing + Journal* **1**, (2019).]{.csl-right-inline} + ::: + :::: +DEBUG - Inline cache: {'[@test]': '^1^', '[@nonexistent]': '^**nonexistent?**^', '[@test, see pp. 100]': '^1,\\ see\\ pp. 100^', '[see @test, pp. 100, 200]': '^see\\ 1^'} +DEBUG - Reference cache: {'test': 'Author, F. & Author, S. Test title. *Testing Journal* **1**, (2019).'} +WARNING - Error formatting citation nonexistent: 'nonexistent' +DEBUG - Markdown: + # This is an example of how to use the mkdocs-bibtex plugin + + ## Citation + + Citation [^test] + + ## Non existing citation + + This should fail on --strict mode + + Citation + + ## Citation with affix + + Citation [^test] + + ## Citation with multiple affixes + + Citation [^test] + + + ## Bibliography + + [^test]: Author, F. & Author, S. Test title. *Testing Journal* **1**, (2019). +DEBUG - Reading: full_bib.md (...) -DEBUG - Reading: publications.md -DEBUG - Running 2 `page_markdown` events -DEBUG - Formatting all bib entries... -DEBUG - --Converting bibtex entry 'foo2019' with CSL file 'docs/bib/apa_verbose.csl' using pandoc>=2.11 -DEBUG - --SUCCESS Converting bibtex entry 'foo2019' with CSL file 'docs/bib/apa_verbose.csl' using pandoc>=2.11 -DEBUG - --Converting bibtex entry 'bar2024' with CSL file 'docs/bib/apa_verbose.csl' using pandoc>=2.11 -DEBUG - --SUCCESS Converting bibtex entry 'bar2024' with CSL file 'docs/bib/apa_verbose.csl' using pandoc>=2.11 -INFO - SUCCESS Formatting all bib entries -DEBUG - Replacing citation keys with the generated ones... -DEBUG - --Rendering citation inline for '[@foo2019]'... -DEBUG - ----Converting pandoc citation key '[@foo2019]' with CSL file 'docs/bib/apa_verbose.csl' and Bibliography file '(...)/tmpzt7t8p0y/temp.bib'... -DEBUG - ----SUCCESS Converting pandoc citation key '[@foo2019]' with CSL file 'docs/bib/apa_verbose.csl' and Bibliography file '(...)/tmpzt7t8p0y/temp.bib' -DEBUG - --SUCCESS Rendering citation inline for '[@foo2019]' -DEBUG - --Rendering citation inline for '[@bar2024]'... -DEBUG - ----Converting pandoc citation key '[@bar2024]' with CSL file 'docs/bib/apa_verbose.csl' and Bibliography file '(...)/tmpzt7t8p0y/temp.bib'... -DEBUG - ----SUCCESS Converting pandoc citation key '[@bar2024]' with CSL file 'docs/bib/apa_verbose.csl' and Bibliography file '(...)/tmpzt7t8p0y/temp.bib' -DEBUG - --SUCCESS Rendering citation inline for '[@bar2024]' -DEBUG - SUCCESS Replacing citation keys with the generated ones ``` diff --git a/example/docs/full_bib.md b/example/docs/full_bib.md new file mode 100644 index 0000000..5070fe3 --- /dev/null +++ b/example/docs/full_bib.md @@ -0,0 +1,3 @@ +# This is a full bibliography + +\full_bibliography diff --git a/example/docs/index.md b/example/docs/index.md new file mode 100644 index 0000000..6873766 --- /dev/null +++ b/example/docs/index.md @@ -0,0 +1,24 @@ +# This is an example of how to use the mkdocs-bibtex plugin + +## Citation + +Citation [@test] + +## Non existing citation + +This should fail on --strict mode + +Citation [@nonexistent] + +## Citation with affix + +Citation [@test, see pp. 100] + +## Citation with multiple affixes + +Citation [see @test, pp. 100, 200] + + +## Bibliography + +\bibliography diff --git a/example/mkdocs.yml b/example/mkdocs.yml new file mode 100644 index 0000000..46df12f --- /dev/null +++ b/example/mkdocs.yml @@ -0,0 +1,15 @@ +site_name: Example Mkdocs-bibtex + +plugins: + - bibtex: + bib_file: refs.bib + csl_file: nature.csl + #csl_file: springer-basic-author-date.csl + +markdown_extensions: + - footnotes + - pymdownx.caret + +nav: + - Index: index.md + - Bibliography: full_bib.md \ No newline at end of file diff --git a/example/nature.csl b/example/nature.csl new file mode 100644 index 0000000..2646cfe --- /dev/null +++ b/example/nature.csl @@ -0,0 +1,132 @@ + + diff --git a/example/refs.bib b/example/refs.bib new file mode 100644 index 0000000..f5d1a96 --- /dev/null +++ b/example/refs.bib @@ -0,0 +1,41 @@ +@article{test, + title={Test Title}, + author={Author, First and Author, Second}, + journal={Testing Journal}, + volume={1}, + year={2019}, + publisher={Test_Publisher} +} + +@article{test2, + title={{Test Title (TT)}}, + author={Author, First and Author, Second}, + journal={Testing Journal (TJ)}, + volume={1}, + year={2019}, + publisher={Test_Publisher (TP)} +} + +@article{Bivort2016, + title = {Evidence for Selective Attention in the Insect Brain}, + author = {De Bivort, Benjamin L. and Van Swinderen, Bruno}, + year = {2016}, + volume = {15}, + pages = {1--7}, + issn = {22145753}, + doi = {10.1016/j.cois.2016.02.007}, + abstract = {The capacity for selective attention appears to be required by any animal responding to an environment containing multiple objects, although this has been difficult to study in smaller animals such as insects. Clear operational characteristics of attention however make study of this crucial brain function accessible to any animal model. Whereas earlier approaches have relied on freely behaving paradigms placed in an ecologically relevant context, recent tethered preparations have focused on brain imaging and electrophysiology in virtual reality environments. Insight into brain activity during attention-like behavior has revealed key elements of attention in the insect brain. Surprisingly, a variety of brain structures appear to be involved, suggesting that even in the smallest brains attention might involve widespread coordination of neural activity.}, + journal = {Current Opinion in Insect Science}, + keywords = {attention,bees,drosophila,insects}, + pmid = {27436727} +} + +@article{test_citavi, + title={{Test Title (TT)}}, + author={Author, First and Author, Second}, + journal={Testing Journal (TJ)}, + volume={1}, + year={2019}, + publisher={Test_Publisher (TP)}, + url = {\url{https://doi.org/10.21577/0103-5053.20190253}} +} diff --git a/example/springer-basic-author-date.csl b/example/springer-basic-author-date.csl new file mode 100644 index 0000000..5edf631 --- /dev/null +++ b/example/springer-basic-author-date.csl @@ -0,0 +1,239 @@ + + diff --git a/pyproject.toml b/pyproject.toml index 05ba743..0ce959e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,39 @@ +[build-system] +requires = ["setuptools>=68.0.0", "setuptools_scm"] +build-backend = "setuptools.build_meta" + +[project] +name = "mkdocs-bibtex" +dynamic = ["version"] +description = "An MkDocs plugin that enables managing citations with BibTex" +readme = "README.md" +requires-python = ">=3.6" +license = {text = "BSD-3-Clause-LBNL"} +keywords = ["mkdocs", "python", "markdown", "bibtex"] +authors = [ + {name = "Shyam Dwaraknath", email = "16827130+shyamd@users.noreply.github.com"}, +] +dependencies = [ + "mkdocs>=1.2", + "pybtex>=0.22", + "pypandoc>=1.5", + "requests>=2.8.1", + "validators>=0.19.0", + "setuptools>=68.0.0", + "responses>=0.25.6", +] + +[project.urls] +Homepage = "https://github.com/shyamd/mkdocs-bibtex/" + +[project.entry-points."mkdocs.plugins"] +bibtex = "mkdocs_bibtex.plugin:BibTexPlugin" + +[tool.setuptools] +package-dir = {"" = "src"} +packages = ["mkdocs_bibtex"] + + [tool.ruff] line-length = 120 exclude = [ @@ -6,6 +42,8 @@ exclude = [ '__init__.py', ] +[tool.setuptools_scm] + [tool.ruff.lint] ignore = [ 'E741', diff --git a/requirements-testing.txt b/requirements-testing.txt index f70023d..bc97659 100644 --- a/requirements-testing.txt +++ b/requirements-testing.txt @@ -2,6 +2,5 @@ pytest==8.3.4 pytest-cov==6.0.0 pytest-pretty==1.2.0 mypy==1.14.1 -responses==0.25.6 ruff==0.9.1 types-requests~=2.32.0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index a5fcfd3..e7c646f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ pybtex==0.24.0 pypandoc==1.14 requests==2.32.3 validators==0.34.0 +responses==0.25.6 \ No newline at end of file diff --git a/setup.py b/setup.py deleted file mode 100644 index b40f911..0000000 --- a/setup.py +++ /dev/null @@ -1,31 +0,0 @@ -from setuptools import find_packages, setup - -with open("README.md", "r") as fh: - long_description = fh.read() - -setup( - name="mkdocs-bibtex", - use_scm_version=True, - setup_requires=["setuptools_scm"], - description="An MkDocs plugin that enables managing citations with BibTex", - long_description=long_description, - long_description_content_type="text/markdown", - keywords="mkdocs python markdown bibtex", - url="https://github.com/shyamd/mkdocs-bibtex/", - author="Shyam Dwaraknath", - author_email="shyamd@lbl.gov", - license="BSD-3-Clause-LBNL", - python_requires=">=3.6", - install_requires=[ - "mkdocs>=1", - "pybtex>=0.22", - "pypandoc>=1.5", - "requests>=2.8.1", - "validators>=0.19.0", - "setuptools>=68.0.0" - ], - tests_require=["pytest"], - packages=find_packages("src"), - package_dir={"": "src"}, - entry_points={"mkdocs.plugins": ["bibtex = mkdocs_bibtex.plugin:BibTexPlugin"]}, -) diff --git a/src/mkdocs_bibtex/citation.py b/src/mkdocs_bibtex/citation.py new file mode 100644 index 0000000..de26016 --- /dev/null +++ b/src/mkdocs_bibtex/citation.py @@ -0,0 +1,75 @@ +from dataclasses import dataclass +from typing import List +import re + + +CITATION_REGEX = re.compile(r"(?:(?P[^@;]*?)\s*)?@(?P[\w-]+)(?:,\s*(?P[^;]+))?") +CITATION_BLOCK_REGEX = re.compile(r"\[(.*?)\]") +EMAIL_REGEX = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}") + + +@dataclass +class Citation: + """Represents a citation in raw markdown without formatting""" + + key: str + prefix: str = "" + suffix: str = "" + + def __str__(self) -> str: + """String representation of the citation""" + parts = [] + if self.prefix: + parts.append(self.prefix) + parts.append(f"@{self.key}") + if self.suffix: + parts.append(self.suffix) + return " ".join(parts) + + @classmethod + def from_markdown(cls, markdown: str) -> List["Citation"]: + """Extracts citations from a markdown string""" + citations = [] + + pos_citations = markdown.split(";") + pos_citations = [citation for citation in pos_citations if EMAIL_REGEX.match(citation) is None] + + for citation in pos_citations: + match = CITATION_REGEX.match(citation) + + if match: + result = {group: (match.group(group) or "") for group in ["prefix", "key", "suffix"]} + citations.append(Citation(prefix=result["prefix"], key=result["key"], suffix=result["suffix"])) + return citations + + +@dataclass +class CitationBlock: + citations: List[Citation] + raw: str = "" + + def __str__(self) -> str: + """String representation of the citation block""" + if self.raw != "": + return f"[{self.raw}]" + return "[" + "; ".join(str(citation) for citation in self.citations) + "]" + + @classmethod + def from_markdown(cls, markdown: str) -> List["CitationBlock"]: + """Extracts citation blocks from a markdown string""" + """ + Given a markdown string + 1. Find all cite blocks by looking for square brackets + 2. For each cite block, try to extract the citations + - if this errors there are no citations in this block and we move on + - if this succeeds we have a list of citations + """ + citation_blocks = [] + for match in CITATION_BLOCK_REGEX.finditer(markdown): + try: + citation_blocks.append( + CitationBlock(raw=match.group(1), citations=Citation.from_markdown(match.group(1))) + ) + except Exception as e: + print(f"Error extracting citations from block: {e}") + return citation_blocks diff --git a/src/mkdocs_bibtex/config.py b/src/mkdocs_bibtex/config.py new file mode 100644 index 0000000..c491a6e --- /dev/null +++ b/src/mkdocs_bibtex/config.py @@ -0,0 +1,32 @@ +# 3rd party imports +from mkdocs.config import base, config_options + + +class BibTexConfig(base.Config): + """Configuration of the BibTex pluging for mkdocs. + + Options: + bib_file (string): path or url to a single bibtex file for entries, + url example: https://api.zotero.org/*/items?format=bibtex + bib_dir (string): path to a directory of bibtex files for entries + bib_command (string): command to place a bibliography relevant to just that file + defaults to \bibliography + bib_by_default (bool): automatically appends bib_command to markdown pages + by default, defaults to true + full_bib_command (string): command to place a full bibliography of all references + csl_file (string, optional): path or url to a CSL file, relative to mkdocs.yml. + footnote_format (string): format for the footnote number, defaults to "{number}" + """ + + # Input files + bib_file = config_options.Optional(config_options.Type(str)) + bib_dir = config_options.Optional(config_options.Dir(exists=True)) + csl_file = config_options.Optional(config_options.Type(str)) + + # Commands + bib_command = config_options.Type(str, default="\\bibliography") + full_bib_command = config_options.Type(str, default="\\full_bibliography") + + # Settings + bib_by_default = config_options.Type(bool, default=True) + footnote_format = config_options.Type(str, default="{key}") diff --git a/src/mkdocs_bibtex/plugin.py b/src/mkdocs_bibtex/plugin.py index b2bfe90..09d3898 100644 --- a/src/mkdocs_bibtex/plugin.py +++ b/src/mkdocs_bibtex/plugin.py @@ -1,61 +1,36 @@ -import re import time import validators from collections import OrderedDict from pathlib import Path -from mkdocs.config import config_options from mkdocs.plugins import BasePlugin -from pybtex.database import BibliographyData, parse_file + +from mkdocs_bibtex.citation import CitationBlock, Citation + +from mkdocs_bibtex.config import BibTexConfig +from mkdocs_bibtex.registry import SimpleRegistry, PandocRegistry +from mkdocs.exceptions import ConfigurationError + from mkdocs_bibtex.utils import ( - find_cite_blocks, - extract_cite_keys, - format_bibliography, - format_pandoc, - format_simple, - insert_citation_keys, tempfile_from_url, log, ) -class BibTexPlugin(BasePlugin): +class BibTexPlugin(BasePlugin[BibTexConfig]): """ Allows the use of bibtex in markdown content for MKDocs. - - Options: - bib_file (string): path or url to a single bibtex file for entries, - url example: https://api.zotero.org/*/items?format=bibtex - bib_dir (string): path to a directory of bibtex files for entries - bib_command (string): command to place a bibliography relevant to just that file - defaults to \bibliography - bib_by_default (bool): automatically appends bib_command to markdown pages - by default, defaults to true - full_bib_command (string): command to place a full bibliography of all references - csl_file (string, optional): path or url to a CSL file, relative to mkdocs.yml. - cite_inline (bool): Whether or not to render inline citations, requires CSL, defaults to false """ - config_scheme = [ - ("bib_file", config_options.Type(str, required=False)), - ("bib_dir", config_options.Dir(exists=True, required=False)), - ("bib_command", config_options.Type(str, default="\\bibliography")), - ("bib_by_default", config_options.Type(bool, default=True)), - ("full_bib_command", config_options.Type(str, default="\\full_bibliography")), - ("csl_file", config_options.Type(str, default="")), - ("cite_inline", config_options.Type(bool, default=False)), - ("footnote_format", config_options.Type(str, default="{number}")), - ] - def __init__(self): self.bib_data = None self.all_references = OrderedDict() - self.unescape_for_arithmatex = False - self.configured = False + self.last_configured = None + self.registry = None def on_startup(self, *, command, dirty): - """ Having on_startup() tells mkdocs to keep the plugin object upon rebuilds""" + """Having on_startup() tells mkdocs to keep the plugin object upon rebuilds""" pass def on_config(self, config): @@ -66,28 +41,20 @@ def on_config(self, config): bibfiles = [] # Set bib_file from either url or path - if self.config.get("bib_file", None) is not None: - is_url = validators.url(self.config["bib_file"]) + if self.config.bib_file is not None: + is_url = validators.url(self.config.bib_file) # if bib_file is a valid URL, cache it with tempfile if is_url: - bibfiles.append(tempfile_from_url("bib file", self.config["bib_file"], ".bib")) + bibfiles.append(tempfile_from_url("bib file", self.config.bib_file, ".bib")) else: - bibfiles.append(self.config["bib_file"]) - elif self.config.get("bib_dir", None) is not None: - bibfiles.extend(Path(self.config["bib_dir"]).rglob("*.bib")) + bibfiles.append(self.config.bib_file) + elif self.config.bib_dir is not None: + bibfiles.extend(Path(self.config.bib_dir).rglob("*.bib")) else: # pragma: no cover - raise Exception("Must supply a bibtex file or directory for bibtex files") - - # load bibliography data - refs = {} - log.info(f"Loading data from bib files: {bibfiles}") - for bibfile in bibfiles: - log.debug(f"Parsing bibtex file {bibfile}") - bibdata = parse_file(bibfile) - refs.update(bibdata.entries) + raise ConfigurationError("Must supply a bibtex file or directory for bibtex files") - if hasattr(self,"last_configured"): - # Skip rebuilding bib data if all files are older than the initial config + # Skip rebuilding bib data if all files are older than the initial config + if self.last_configured is not None: if all(Path(bibfile).stat().st_mtime < self.last_configured for bibfile in bibfiles): log.info("BibTexPlugin: No changes in bibfiles.") return config @@ -95,25 +62,21 @@ def on_config(self, config): # Clear references on reconfig self.all_references = OrderedDict() - self.bib_data = BibliographyData(entries=refs) - self.bib_data_bibtex = self.bib_data.to_string("bibtex") - # Set CSL from either url or path (or empty) - is_url = validators.url(self.config["csl_file"]) - if is_url: - self.csl_file = tempfile_from_url("CSL file", self.config["csl_file"], ".csl") + if self.config.csl_file is not None and validators.url(self.config.csl_file): + self.csl_file = tempfile_from_url("CSL file", self.config.csl_file, ".csl") else: - self.csl_file = self.config.get("csl_file", None) - - # Toggle whether or not to render citations inline (Requires CSL) - self.cite_inline = self.config.get("cite_inline", False) - if self.cite_inline and not self.csl_file: # pragma: no cover - raise Exception("Must supply a CSL file in order to use cite_inline") + self.csl_file = self.config.csl_file - if "{number}" not in self.config.get("footnote_format"): - raise Exception("Must include `{number}` placeholder in footnote_format") + if "{key}" not in self.config.footnote_format: + raise ConfigurationError("Must include `{key}` placeholder in footnote_format") - self.footnote_format = self.config.get("footnote_format") + if self.csl_file: + self.registry = PandocRegistry( + bib_files=bibfiles, csl_file=self.csl_file, footnote_format=self.config.footnote_format + ) + else: + self.registry = SimpleRegistry(bib_files=bibfiles, footnote_format=self.config.footnote_format) self.last_configured = time.time() return config @@ -134,125 +97,59 @@ def on_page_markdown(self, markdown, page, config, files): 5. Insert the full bibliograph into the markdown """ - # 1. Grab all the cited keys in the markdown - cite_keys = find_cite_blocks(markdown) + # 1. Find all cite blocks in the markdown + cite_blocks = CitationBlock.from_markdown(markdown) - # 2. Convert all the citations to text references - citation_quads = self.format_citations(cite_keys) + # 2. Validate the cite blocks + self.registry.validate_citation_blocks(cite_blocks) - # 3. Convert cited keys to citation, - # or a footnote reference if inline_cite is false. - if self.cite_inline: - markdown = insert_citation_keys( - citation_quads, - markdown, - self.csl_file, - self.bib_data_bibtex, - ) - else: - markdown = insert_citation_keys(citation_quads, markdown) + # 3. Replace the cite blocks with the inline citations + for block in cite_blocks: + replacement = self.registry.inline_text(block) + markdown = markdown.replace(str(block), replacement) - # 4. Insert in the bibliopgrahy text into the markdown - bib_command = self.config.get("bib_command", "\\bibliography") + # 4a. Esnure we have a bibliography if desired + bib_command = self.config.bib_command - if self.config.get("bib_by_default"): + if self.config.bib_by_default and markdown.count(bib_command) == 0: markdown += f"\n{bib_command}" - bibliography = format_bibliography(citation_quads) - markdown = re.sub( - re.escape(bib_command), - bibliography, - markdown, - ) + # 4. Insert in the bibliopgrahy text into the markdown + citations = OrderedDict() + for block in cite_blocks: + for citation in block.citations: + citations[citation.key] = citation + + bibliography = [] + for citation in citations.values(): + try: + bibliography.append( + "[^{}]: {}".format( + self.registry.footnote_format.format(key=citation.key), self.registry.reference_text(citation) + ) + ) + except Exception as e: + log.warning(f"Error formatting citation {citation.key}: {e}") + bibliography = "\n".join(bibliography) + markdown = markdown.replace(bib_command, bibliography) # 5. Build the full Bibliography and insert into the text - full_bib_command = self.config.get("full_bib_command", "\\full_bibliography") - - markdown = re.sub( - re.escape(full_bib_command), - self.full_bibliography, - markdown, - ) + full_bib_command = self.config.full_bib_command + if markdown.count(full_bib_command) > 0: + log.info("Building full bibliography") + all_citations = [Citation(key=key) for key in self.registry.bib_data.entries] + blocks = [CitationBlock(citations=[cite]) for cite in all_citations] + self.registry.validate_citation_blocks(blocks) + full_bibliography = [] + for citation in all_citations: + full_bibliography.append( + "[^{}]: {}".format( + self.registry.footnote_format.format(key=citation.key), self.registry.reference_text(citation) + ) + ) + full_bibliography = "\n".join(full_bibliography) + markdown = markdown.replace(full_bib_command, full_bibliography) + + log.debug(f"Markdown: \n{markdown}") return markdown - - def format_footnote_key(self, number): - """ - Create footnote key based on footnote_format - - Args: - number (int): citation number - - Returns: - formatted footnote - """ - return self.footnote_format.format(number=number) - - def format_citations(self, cite_keys): - """ - Formats references into citation quads and adds them to the global registry - - Args: - cite_keys (list): List of full cite_keys that maybe compound keys - - Returns: - citation_quads: quad tuples of the citation inforamtion - """ - - # Deal with arithmatex fix at some point - - # 1. Extract the keys from the keyset - entries = OrderedDict() - pairs = [ - [cite_block, key] - for cite_block in cite_keys - for key in extract_cite_keys(cite_block) - ] - keys = list(OrderedDict.fromkeys([k for _, k in pairs]).keys()) - numbers = {k: str(n + 1) for n, k in enumerate(keys)} - - # Remove non-existant keys from pairs - pairs = [p for p in pairs if p[1] in self.bib_data.entries] - - # 2. Collect any unformatted reference keys - for _, key in pairs: - if key not in self.all_references: - entries[key] = self.bib_data.entries[key] - - # 3. Format entries - log.debug("Formatting all bib entries...") - if self.csl_file: - self.all_references.update(format_pandoc(entries, self.csl_file)) - else: - self.all_references.update(format_simple(entries)) - log.debug("SUCCESS Formatting all bib entries") - - # 4. Construct quads - quads = [ - ( - cite_block, - key, - self.format_footnote_key(numbers[key]), - self.all_references[key], - ) - for cite_block, key in pairs - ] - - # List the quads in order to remove duplicate entries - return list(dict.fromkeys(quads)) - - @property - def full_bibliography(self): - """ - Returns the full bibliography text - """ - - bibliography = [] - for number, (key, citation) in enumerate(self.all_references.items()): - bibliography_text = "[^{}]: {}".format( - number, - citation, - ) - bibliography.append(bibliography_text) - - return "\n".join(bibliography) diff --git a/src/mkdocs_bibtex/registry.py b/src/mkdocs_bibtex/registry.py new file mode 100644 index 0000000..5d8ae49 --- /dev/null +++ b/src/mkdocs_bibtex/registry.py @@ -0,0 +1,221 @@ +from abc import ABC, abstractmethod +from mkdocs_bibtex.citation import Citation, CitationBlock +from mkdocs_bibtex.utils import log +from pybtex.database import BibliographyData, parse_file +from pybtex.backends.markdown import Backend as MarkdownBackend +from pybtex.style.formatting.plain import Style as PlainStyle +import pypandoc +import tempfile +import re +from pathlib import Path + + +class ReferenceRegistry(ABC): + """ + A registry of references that can be used to format citations + """ + + def __init__(self, bib_files: list[str], footnote_format: str = "{key}"): + refs = {} + log.info(f"Loading data from bib files: {bib_files}") + for bibfile in bib_files: + log.debug(f"Parsing bibtex file {bibfile}") + bibdata = parse_file(bibfile) + refs.update(bibdata.entries) + self.bib_data = BibliographyData(entries=refs) + self.footnote_format = footnote_format + + @abstractmethod + def validate_citation_blocks(self, citation_blocks: list[CitationBlock]) -> None: + """Validates all citation blocks. Throws an error if any citation block is invalid""" + + @abstractmethod + def inline_text(self, citation_block: CitationBlock) -> str: + """Retreives the inline citation text for a citation block""" + + @abstractmethod + def reference_text(self, citation: Citation) -> str: + """Retreives the reference text for a citation""" + + +class SimpleRegistry(ReferenceRegistry): + def __init__(self, bib_files: list[str], footnote_format: str = "{key}"): + super().__init__(bib_files, footnote_format) + self.style = PlainStyle() + self.backend = MarkdownBackend() + + def validate_citation_blocks(self, citation_blocks: list[CitationBlock]) -> None: + """Validates all citation blocks. Throws an error if any citation block is invalid""" + for citation_block in citation_blocks: + for citation in citation_block.citations: + if citation.key not in self.bib_data.entries: + log.warning(f"Citing unknown reference key {citation.key}") + + for citation_block in citation_blocks: + for citation in citation_block.citations: + if citation.prefix != "" or citation.suffix != "": + log.warning(f"Affixes not supported in simple mode: {citation}") + + def inline_text(self, citation_block: CitationBlock) -> str: + keys = [ + self.footnote_format.format(key=citation.key) + for citation in citation_block.citations + if citation.key in self.bib_data.entries + ] + return "".join(f"[^{key}]" for key in keys) + + def reference_text(self, citation: Citation) -> str: + entry = self.bib_data.entries[citation.key] + log.debug(f"Converting bibtex entry {citation.key!r} without pandoc") + formatted_entry = self.style.format_entry("", entry) + entry_text = formatted_entry.text.render(self.backend) + entry_text = entry_text.replace("\n", " ") + # Clean up some common escape sequences + entry_text = entry_text.replace("\\(", "(").replace("\\)", ")").replace("\\.", ".") + log.debug(f"SUCCESS Converting bibtex entry {citation.key!r} without pandoc") + return entry_text + + +class PandocRegistry(ReferenceRegistry): + """A registry that uses Pandoc to format citations""" + + def __init__(self, bib_files: list[str], csl_file: str, footnote_format: str = "{key}"): + super().__init__(bib_files, footnote_format) + self.csl_file = csl_file + + # Get pandoc version for formatting decisions + pandoc_version = tuple(int(ver) for ver in pypandoc.get_pandoc_version().split(".")) + if not pandoc_version >= (2, 11): + raise ValueError("Pandoc version 2.11 or higher is required for this registry") + + # Cache for formatted citations + self._inline_cache: dict[str, str] = {} + self._reference_cache: dict[str, str] = {} + self._is_inline = self._check_csl_type(self.csl_file) + + def inline_text(self, citation_block: CitationBlock) -> str: + """Get the inline text for a citation block""" + footnotes = " ".join( + f"[^{self.footnote_format.format(key=citation.key)}]" + for citation in citation_block.citations + if citation.key in self._reference_cache + ) + + if self._is_inline: + # For inline styles, return both inline citation and footnote + inline_text = self._inline_cache.get(str(citation_block), str(citation_block)) + return inline_text + footnotes + else: + # For footnote styles, just return footnote links + return footnotes + + def reference_text(self, citation: Citation) -> str: + """Returns cached reference text""" + return self._reference_cache[citation.key] + + def validate_citation_blocks(self, citation_blocks: list[CitationBlock]) -> None: + """Validates citation blocks and pre-formats all citations""" + # First validate all keys exist + for citation_block in citation_blocks: + for citation in citation_block.citations: + if citation.key not in self.bib_data.entries: + log.warning(f"Citing unknown reference key {citation.key}") + + # Pre-Process with appropriate pandoc version + self._inline_cache, self._reference_cache = self._process_with_pandoc(citation_blocks) + + @property + def bib_data_bibtex(self) -> str: + """Convert bibliography data to BibTeX format""" + return self.bib_data.to_string("bibtex") + + def _process_with_pandoc(self, citation_blocks: list[CitationBlock]) -> tuple[dict, dict]: + """Process citations with pandoc""" + + # Build the document pandoc can process and we can parse to extract inline citations and reference text + full_doc = """ +--- +link-citations: false +--- + +""" + citation_map = {index: block for index, block in enumerate(citation_blocks)} + full_doc += "\n\n".join(f"{index}. {block}" for index, block in citation_map.items()) + full_doc += "\n\n# References\n\n" + log.debug("Converting with pandoc:") + log.debug(full_doc) + with tempfile.TemporaryDirectory() as tmpdir: + bib_path = Path(tmpdir).joinpath("temp.bib") + with open(bib_path, "wt", encoding="utf-8") as bibfile: + bibfile.write(self.bib_data_bibtex) + + args = ["--citeproc", "--bibliography", str(bib_path), "--csl", self.csl_file] + markdown = pypandoc.convert_text( + source=full_doc, to="markdown-citations", format="markdown", extra_args=args + ) + + log.debug("Pandoc output:") + log.debug(markdown) + try: + splits = markdown.split("# References") + inline_citations, references = splits[0], splits[1] + except IndexError: + raise ValueError("Failed to parse pandoc output") + + # Parse inline citations + inline_citations = inline_citations.strip() + + # Use regex to match numbered entries, handling multi-line citations + citation_pattern = re.compile(r"(\d+)\.\s+(.*?)(?=(?:\n\d+\.|$))", re.DOTALL) + matches = citation_pattern.finditer(inline_citations) + + # Create a dictionary of cleaned citations (removing extra whitespace and newlines) + inline_citations = {int(match.group(1)): " ".join(match.group(2).split()) for match in matches} + + inline_cache = {str(citation_map[index]): citation for index, citation in inline_citations.items()} + + # Parse references + reference_cache = {} + + # Pattern for format with .csl-left-margin and .csl-right-inline + pattern1 = r"::: \{#ref-(?P[^\s]+) .csl-entry\}\n\[.*?\]\{\.csl-left-margin\}\[(?P.*?)\]\{\.csl-right-inline\}" # noqa: E501 + + # Pattern for simple reference format + pattern2 = r"::: \{#ref-(?P[^\s]+) .csl-entry\}\n(?P.*?)(?=:::|$)" + + # Try first pattern + matches1 = re.finditer(pattern1, references, re.DOTALL) + for match in matches1: + key = match.group("key").strip() + citation = match.group("citation").replace("\n", " ").strip() + reference_cache[key] = citation + + # If no matches found, try second pattern + if not reference_cache: + matches2 = re.finditer(pattern2, references, re.DOTALL) + for match in matches2: + key = match.group("key").strip() + citation = match.group("citation").replace("\n", " ").strip() + reference_cache[key] = citation + + log.debug(f"Inline cache: {inline_cache}") + log.debug(f"Reference cache: {reference_cache}") + return inline_cache, reference_cache + + def _check_csl_type(self, csl_file: str) -> bool: + """Check if CSL file is footnote or inline style""" + if not csl_file: + return False + + try: + with open(csl_file) as f: + csl_content = f.read() + # Check if citation-format is "author-date" + # For "numeric" styles we default to footnotes + if 'citation-format="author-date"' in csl_content: + return True + # Default to footnote style + return False + except Exception as e: + log.warning(f"Error reading CSL file: {e}") + return False diff --git a/src/mkdocs_bibtex/utils.py b/src/mkdocs_bibtex/utils.py index 174bc64..e15b0e2 100644 --- a/src/mkdocs_bibtex/utils.py +++ b/src/mkdocs_bibtex/utils.py @@ -1,295 +1,15 @@ import logging -import re import requests import tempfile import urllib.parse -from collections import OrderedDict -from functools import lru_cache -from itertools import groupby -from pathlib import Path -from packaging.version import Version -import mkdocs -import pypandoc - -from pybtex.backends.markdown import Backend as MarkdownBackend -from pybtex.database import BibliographyData -from pybtex.style.formatting.plain import Style as PlainStyle # Grab a logger log = logging.getLogger("mkdocs.plugins.mkdocs-bibtex") -# Add the warning filter only if the version is lower than 1.2 -# Filter doesn't do anything since that version -MKDOCS_LOG_VERSION = '1.2' -if Version(mkdocs.__version__) < Version(MKDOCS_LOG_VERSION): - from mkdocs.utils import warning_filter - log.addFilter(warning_filter) - - -def format_simple(entries): - """ - Format the entries using a simple built in style - - Args: - entries (dict): dictionary of entries - - Returns: - references (dict): dictionary of citation texts - """ - style = PlainStyle() - backend = MarkdownBackend() - citations = OrderedDict() - for key, entry in entries.items(): - log.debug(f"Converting bibtex entry {key!r} without pandoc") - formatted_entry = style.format_entry("", entry) - entry_text = formatted_entry.text.render(backend) - entry_text = entry_text.replace("\n", " ") - # Local reference list for this file - citations[key] = ( - entry_text.replace("\\(", "(").replace("\\)", ")").replace("\\.", ".") - ) - log.debug(f"SUCCESS Converting bibtex entry {key!r} without pandoc") - return citations - - -def format_pandoc(entries, csl_path): - """ - Format the entries using pandoc - - Args: - entries (dict): dictionary of entries - csl_path (str): path to formatting CSL Fle - Returns: - references (dict): dictionary of citation texts - """ - pandoc_version = tuple(int(ver) for ver in pypandoc.get_pandoc_version().split(".")) - citations = OrderedDict() - is_new_pandoc = pandoc_version >= (2, 11) - msg = "pandoc>=2.11" if is_new_pandoc else "pandoc<2.11" - for key, entry in entries.items(): - bibtex_string = BibliographyData(entries={entry.key: entry}).to_string("bibtex") - log.debug(f"--Converting bibtex entry {key!r} with CSL file {csl_path!r} using {msg}") - if is_new_pandoc: - citations[key] = _convert_pandoc_new(bibtex_string, csl_path) - else: - citations[key] = _convert_pandoc_legacy(bibtex_string, csl_path) - log.debug(f"--SUCCESS Converting bibtex entry {key!r} with CSL file {csl_path!r} using {msg}") - - return citations - - -def _convert_pandoc_new(bibtex_string, csl_path): - """ - Converts the PyBtex entry into formatted markdown citation text - using pandoc version 2.11 or newer - """ - markdown = pypandoc.convert_text( - source=bibtex_string, - to="markdown_strict", - format="bibtex", - extra_args=[ - "--citeproc", - "--csl", - csl_path, - ], - ) - - markdown = " ".join(markdown.split("\n")) - # Remove newlines from any generated span tag (non-capitalized words) - markdown = re.compile(r"<\/span>[\r\n]").sub(" ", markdown) - - citation_regex = re.compile( - r"(.+?)(?=<\/span>)<\/span>" - ) - try: - citation = citation_regex.findall(re.sub(r"(\r|\n)", "", markdown))[1] - except IndexError: - citation = markdown - return citation.strip() - - -@lru_cache(maxsize=1024) -def _convert_pandoc_citekey(bibtex_string, csl_path, fullcite): - """ - Uses pandoc to convert a markdown citation key reference - to a rendered markdown citation in the given CSL format. - - Limitation (atleast for harvard.csl): multiple citekeys - REQUIRE a '; ' separator to render correctly: - - [see @test; @test2] Works - - [see @test and @test2] Doesn't work - """ - with tempfile.TemporaryDirectory() as tmpdir: - bib_path = Path(tmpdir).joinpath("temp.bib") - with open(bib_path, "wt", encoding="utf-8") as bibfile: - bibfile.write(bibtex_string) - - log.debug(f"----Converting pandoc citation key {fullcite!r} with CSL file {csl_path!r} and Bibliography file" - f" '{bib_path!s}'...") - markdown = pypandoc.convert_text( - source=fullcite, - to="markdown-citations", - format="markdown", - extra_args=["--citeproc", "--csl", csl_path, "--bibliography", bib_path], - ) - log.debug(f"----SUCCESS Converting pandoc citation key {fullcite!r} with CSL file {csl_path!r} and " - f"Bibliography file '{bib_path!s}'") - - # Return only the citation text (first line(s)) - # remove any extra linebreaks to accommodate large author names - markdown = re.compile(r"[\r\n]").sub("", markdown) - return markdown.split(":::")[0].strip() - - -def _convert_pandoc_legacy(bibtex_string, csl_path): - """ - Converts the PyBtex entry into formatted markdown citation text - using pandoc version older than 2.11 - """ - with tempfile.TemporaryDirectory() as tmpdir: - bib_path = Path(tmpdir).joinpath("temp.bib") - with open(bib_path, "wt", encoding="utf-8") as bibfile: - bibfile.write(bibtex_string) - citation_text = """ ---- -nocite: '@*' ---- -""" - - markdown = pypandoc.convert_text( - source=citation_text, - to="markdown_strict", - format="md", - extra_args=["--csl", csl_path, "--bibliography", bib_path], - filters=["pandoc-citeproc"], - ) - - citation_regex = re.compile(r"[\d\.\\\s]*(.*)") - citation = citation_regex.findall(markdown.replace("\n", " "))[0] - return citation.strip() - - -def extract_cite_keys(cite_block): - """ - Extract just the keys from a citation block - """ - cite_regex = re.compile(r"@([\w\.:-]*)") - cite_keys = re.findall(cite_regex, cite_block) - - return cite_keys - - -def find_cite_blocks(markdown): - """ - Finds entire cite blocks in the markdown text - - Args: - markdown (str): the markdown text to be extract citation - blocks from - - regex explanation: - - first group (1): everything. (the only thing we need) - - second group (2): (?:(?:\[(-{0,1}[^@]*)) |\[(?=-{0,1}@)) - - third group (3): ((?:-{0,1}@\w*(?:; ){0,1})+) - - fourth group (4): (?:[^\]\n]{0,1} {0,1})([^\]\n]*) - - The first group captures the entire cite block, as is - The second group captures the prefix, which is everything between '[' and ' @| -@' - The third group captures the citekey(s), ';' separated (affixes NOT supported) - The fourth group captures anything after the citekeys, excluding the leading whitespace - (The non-capturing group removes any symbols or whitespaces between the citekey and suffix) - - Matches for [see @author; @doe my suffix here] - [0] entire block: '[see @author; @doe my suffix here]' - [1] prefix: 'see' - [2] citekeys: '@author; @doe' - [3] suffix: 'my suffix here' - - Does NOT match: [mail@example.com] - DOES match [mail @example.com] as [mail][@example][com] - """ - r = r"((?:(?:\[(-{0,1}[^@]*)) |\[(?=-{0,1}@))((?:-{0,1}@\w*(?:; ){0,1})+)(?:[^\]\n]{0,1} {0,1})([^\]\n]*)\])" - cite_regex = re.compile(r) - - citation_blocks = [ - # We only care about the block (group 1) - (matches.group(1)) - for matches in re.finditer(cite_regex, markdown) - ] - - return citation_blocks - - -def insert_citation_keys(citation_quads, markdown, csl=False, bib=False): - """ - Insert citations into the markdown text replacing - the old citation keys - - Args: - citation_quads (tuple): a quad tuple of all citation info - markdown (str): the markdown text to modify - - Returns: - markdown (str): the modified Markdown - """ - - log.debug("Replacing citation keys with the generated ones...") - - # Renumber quads if using numbers for citation links - - grouped_quads = [list(g) for _, g in groupby(citation_quads, key=lambda x: x[0])] - for quad_group in grouped_quads: - full_citation = quad_group[0][0] # the full citation block - replacement_citaton = "".join(["[^{}]".format(quad[2]) for quad in quad_group]) - - # if cite_inline is true, convert full_citation with pandoc and add to replacement_citaton - if csl and bib: - log.debug(f"--Rendering citation inline for {full_citation!r}...") - # Verify that the pandoc installation is newer than 2.11 - pandoc_version = pypandoc.get_pandoc_version() - pandoc_version_tuple = tuple(int(ver) for ver in pandoc_version.split(".")) - if pandoc_version_tuple <= (2, 11): - raise RuntimeError( - f"Your version of pandoc (v{pandoc_version}) is " - "incompatible with the cite_inline feature." - ) - - inline_citation = _convert_pandoc_citekey(bib, csl, full_citation) - replacement_citaton = f" {inline_citation}{replacement_citaton}" - - # Make sure inline citations doesn't get an extra whitespace by - # replacing it with whitespace added first - markdown = markdown.replace(f" {full_citation}", replacement_citaton) - log.debug(f"--SUCCESS Rendering citation inline for {full_citation!r}") - - markdown = markdown.replace(full_citation, replacement_citaton) - - log.debug("SUCCESS Replacing citation keys with the generated ones") - - return markdown - - -def format_bibliography(citation_quads): - """ - Generates a bibliography from the citation quads - - Args: - citation_quads (tuple): a quad tuple of all citation info - - Returns: - markdown (str): the Markdown string for the bibliography - """ - new_bib = {quad[2]: quad[3] for quad in citation_quads} - bibliography = [] - for key, citation in new_bib.items(): - bibliography_text = "[^{}]: {}".format(key, citation) - bibliography.append(bibliography_text) - - return "\n".join(bibliography) - -def tempfile_from_url(name, url, suffix): +def tempfile_from_url(name: str, url: str, suffix: str) -> str: + """Download bibfile from a URL.""" log.debug(f"Downloading {name} from URL {url} to temporary file...") if urllib.parse.urlparse(url).hostname == "api.zotero.org": return tempfile_from_zotero_url(name, url, suffix) @@ -297,9 +17,7 @@ def tempfile_from_url(name, url, suffix): try: dl = requests.get(url) if dl.status_code != 200: # pragma: no cover - raise RuntimeError( - f"Couldn't download the url: {url}.\n Status Code: {dl.status_code}" - ) + raise RuntimeError(f"Couldn't download the url: {url}.\n Status Code: {dl.status_code}") file = tempfile.NamedTemporaryFile(mode="wt", encoding="utf-8", suffix=suffix, delete=False) file.write(dl.text) @@ -309,9 +27,7 @@ def tempfile_from_url(name, url, suffix): except requests.exceptions.RequestException: # pragma: no cover pass - raise RuntimeError( - f"Couldn't successfully download the url: {url}" - ) # pragma: no cover + raise RuntimeError(f"Couldn't successfully download the url: {url}") # pragma: no cover def tempfile_from_zotero_url(name: str, url: str, suffix: str) -> str: diff --git a/test_files/test_citation.py b/test_files/test_citation.py new file mode 100644 index 0000000..9fc9f03 --- /dev/null +++ b/test_files/test_citation.py @@ -0,0 +1,137 @@ +""" +This test file tests the citation module and ensures it is compatible with +pybtex basic citations and pandoc citation formattting +""" + +from mkdocs_bibtex.citation import Citation, CitationBlock + + +def test_basic_citation(): + """Test basic citation extraction""" + citations = Citation.from_markdown("@test") + assert len(citations) == 1 + assert citations[0].key == "test" + assert citations[0].prefix == "" + assert citations[0].suffix == "" + + +def test_citation_with_prefix(): + """Test citation with prefix""" + citations = Citation.from_markdown("see @test") + assert len(citations) == 1 + assert citations[0].key == "test" + assert citations[0].prefix == "see" + assert citations[0].suffix == "" + + +def test_citation_with_suffix(): + """Test citation with suffix""" + citations = Citation.from_markdown("@test, p. 123") + assert len(citations) == 1 + assert citations[0].key == "test" + assert citations[0].prefix == "" + assert citations[0].suffix == "p. 123" + + +def test_citation_with_prefix_and_suffix(): + """Test citation with both prefix and suffix""" + citations = Citation.from_markdown("see @test, p. 123") + assert len(citations) == 1 + assert citations[0].key == "test" + assert citations[0].prefix == "see" + assert citations[0].suffix == "p. 123" + + +def test_suppressed_author(): + """Test suppressed author citation""" + citations = Citation.from_markdown("-@test") + assert len(citations) == 1 + assert citations[0].key == "test" + assert citations[0].prefix == "-" + assert citations[0].suffix == "" + + +def test_multiple_citations(): + """Test multiple citations separated by semicolon""" + citations = Citation.from_markdown("@test; @test2") + assert len(citations) == 2 + assert citations[0].key == "test" + assert citations[1].key == "test2" + + +def test_complex_multiple_citations(): + """Test multiple citations with prefixes and suffixes""" + citations = Citation.from_markdown("see @test, p. 123; @test2, p. 456") + assert len(citations) == 2 + assert citations[0].key == "test" + assert citations[0].prefix == "see" + assert citations[0].suffix == "p. 123" + assert citations[1].key == "test2" + assert citations[1].prefix == "" + assert citations[1].suffix == "p. 456" + + +def test_citation_block(): + """Test citation block extraction""" + blocks = CitationBlock.from_markdown("[see @test, p. 123]") + assert len(blocks) == 1 + assert len(blocks[0].citations) == 1 + assert blocks[0].citations[0].key == "test" + assert blocks[0].citations[0].prefix == "see" + assert blocks[0].citations[0].suffix == "p. 123" + assert str(blocks[0]) == "[see @test, p. 123]" + + +def test_multiple_citation_blocks(): + """Test multiple citation blocks""" + blocks = CitationBlock.from_markdown("[see @test, p. 123] Some text [@test2]") + assert len(blocks) == 2 + assert blocks[0].citations[0].key == "test" + assert blocks[1].citations[0].key == "test2" + assert str(blocks[0]) == "[see @test, p. 123]" + assert str(blocks[1]) == "[@test2]" + + +def test_invalid_citation(): + """Test invalid citation formats""" + citations = Citation.from_markdown("not a citation") + assert len(citations) == 0 + + +def test_email_exclusion(): + """Test that email addresses are not parsed as citations""" + citations = Citation.from_markdown("user@example.com") + assert len(citations) == 0 + + +def test_complex_citation_block(): + """Test complex citation block with multiple citations""" + blocks = CitationBlock.from_markdown("[see @test1, p. 123; @test2, p. 456; -@test3]") + assert len(blocks) == 1 + assert len(blocks[0].citations) == 3 + + assert blocks[0].citations[0].key == "test1" + assert blocks[0].citations[0].prefix == "see" + assert blocks[0].citations[0].suffix == "p. 123" + + assert blocks[0].citations[1].key == "test2" + assert blocks[0].citations[1].prefix == "" + assert blocks[0].citations[1].suffix == "p. 456" + + assert blocks[0].citations[2].key == "test3" + assert blocks[0].citations[2].prefix == " -" + assert blocks[0].citations[2].suffix == "" + assert str(blocks[0]) == "[see @test1, p. 123; @test2, p. 456; -@test3]" + + +def test_citation_string(): + """Test citation string""" + citation = Citation("test", "Author", "2020") + assert str(citation) == "Author @test 2020" + + block = CitationBlock([citation]) + assert str(block) == "[Author @test 2020]" + + citations = [citation, citation] + block = CitationBlock(citations) + assert str(block) == "[Author @test 2020; Author @test 2020]" diff --git a/test_files/test_features.py b/test_files/test_features.py deleted file mode 100644 index a3278a3..0000000 --- a/test_files/test_features.py +++ /dev/null @@ -1,350 +0,0 @@ -""" -This test file checks to make sure each feature works rather than checking each -function. Each feature should have a single test function that covers all the python -functions it that would need to be tested -""" -import os - -import pytest -import pypandoc - -from mkdocs_bibtex.plugin import BibTexPlugin - -from mkdocs_bibtex.utils import ( - find_cite_blocks, - format_bibliography, - insert_citation_keys, -) - -module_dir = os.path.dirname(os.path.abspath(__file__)) -test_files_dir = os.path.abspath(os.path.join(module_dir, "..", "test_files")) - - -@pytest.fixture -def plugin(): - """ - Basic BibTex Plugin without CSL - """ - plugin = BibTexPlugin() - plugin.load_config( - options={"bib_file": os.path.join(test_files_dir, "test.bib")}, - config_file_path=test_files_dir, - ) - plugin.on_config(plugin.config) - plugin.csl_file = None - return plugin - - - -@pytest.fixture -def plugin_advanced_pandoc(plugin): - """ - Enables advanced features via pandoc - """ - # Only valid for Pandoc > 2.11 - pandoc_version = pypandoc.get_pandoc_version() - pandoc_version_tuple = tuple(int(ver) for ver in pandoc_version.split(".")) - if pandoc_version_tuple <= (2, 11): - pytest.skip(f"Unsupported version of pandoc (v{pandoc_version}) installed.") - - plugin.config["bib_file"] = os.path.join(test_files_dir, "test.bib") - plugin.config["csl_file"] = os.path.join( - test_files_dir, "springer-basic-author-date.csl" - ) - plugin.config["cite_inline"] = True - - delattr(plugin,"last_configured") - plugin.on_config(plugin.config) - - return plugin - - -def test_basic_citations(plugin): - """ - Tests super basic citations using the built-in citation style - """ - assert find_cite_blocks("[@test]") == ["[@test]"] - - assert ( - insert_citation_keys( - [ - ( - "[@test]", - "@test", - "1", - "First Author and Second Author", - ) - ], - "[@test]", - ) - == "[^1]" - ) - - ### TODO: test format_bibliography - - assert ( - "[@test]", - "test", - "1", - "First Author and Second Author. Test title. *Testing Journal*, 2019.", - ) == plugin.format_citations(["[@test]"])[0] - - assert ( - "[@test2]", - "test2", - "1", - "First Author and Second Author. Test Title (TT). *Testing Journal (TJ)*, 2019.", - ) == plugin.format_citations(["[@test2]"])[0] - - # test long citation - assert ( - "[@Bivort2016]", - "Bivort2016", - "1", - "Benjamin L. De Bivort and Bruno Van Swinderen. Evidence for selective attention in the insect brain. *Current Opinion in Insect Science*, 15:1–7, 2016. [doi:10.1016/j.cois.2016.02.007](https://doi.org/10.1016/j.cois.2016.02.007).", # noqa: E501 - ) == plugin.format_citations(["[@Bivort2016]"])[0] - - # Test \url embedding - assert ( - "[@test_citavi]", - "test_citavi", - "1", - "First Author and Second Author. Test Title (TT). *Testing Journal (TJ)*, 2019. URL: [\\\\url\\{https://doi.org/10.21577/0103\\-5053.20190253\\}](\\url{https://doi.org/10.21577/0103-5053.20190253}).", # noqa: E501 - ) == plugin.format_citations(["[@test_citavi]"])[0] - - -def test_compound_citations(plugin): - """ - Compound citations are citations that include multiple cite keys - """ - assert find_cite_blocks("[@test; @test2]") == ["[@test; @test2]"] - assert find_cite_blocks("[@test]\n [@test; @test2]") == [ - "[@test]", - "[@test; @test2]", - ] - - assert ( - insert_citation_keys( - [ - ( - "[@test; @test2]", - "@test", - "1", - "First Author and Second Author", - ), - ( - "[@test; @test2]", - "@test2", - "2", - "First Author and Second Author. Test Title (TT). *Testing Journal (TJ)*, 2019", # noqa: E501 - ), - ], - "[@test; @test2]", - ) - == "[^1][^2]" - ) - - quads = [ - ( - "[@test; @test2]", - "@test", - "1", - "First Author and Second Author", - ), - ( - "[@test; @test2]", - "@test2", - "2", - "First Author and Second Author. Test Title (TT). *Testing Journal (TJ)*, 2019", - ), - ] - - bib = format_bibliography(quads) - - assert "[^1]: First Author and Second Author" in bib - assert ( - "[^2]: First Author and Second Author. Test Title (TT). *Testing Journal (TJ)*, 2019" - in bib - ) - - assert [ - ( - "[@test; @test2]", - "test", - "1", - "First Author and Second Author. Test title. *Testing Journal*, 2019.", - ), - ( - "[@test; @test2]", - "test2", - "2", - "First Author and Second Author. Test Title (TT). *Testing Journal (TJ)*, 2019.", - ), - ] == plugin.format_citations(["[@test; @test2]"]) - - -############### -# PANDOC ONLY # -############### - - -def test_basic_pandoc(plugin): - plugin.csl_file = os.path.join(test_files_dir, "nature.csl") - assert ( - "[@test]", - "test", - "1", - "Author, F. & Author, S. Test title. *Testing Journal* **1**, (2019).", - ) == plugin.format_citations(["[@test]"])[0] - - assert ( - "[@Bivort2016]", - "Bivort2016", - "1", - "De Bivort, B. L. & Van Swinderen, B. Evidence for selective attention in the insect brain. *Current Opinion in Insect Science* **15**, 1–7 (2016).", # noqa: E501 - ) == plugin.format_citations(["[@Bivort2016]"])[0] - - # Test a CSL that outputs references in a different style - plugin.csl_file = os.path.join(test_files_dir, "springer-basic-author-date.csl") - assert ( - "[@test]", - "test", - "1", - "Author, F. & Author, S. Test title. *Testing Journal* **1**, (2019).", - ) == plugin.format_citations(["[@test]"])[0] - - assert ( - "[@test_citavi]", - "test_citavi", - "1", - "Author F, Author S (2019) Test Title (TT). Testing Journal (TJ) 1:", - ) == plugin.format_citations(["[@test_citavi]"])[0] - - -def test_inline_ciations(plugin_advanced_pandoc): - plugin = plugin_advanced_pandoc - - # Ensure inline citation works - quads = [("[@test]", None, "1", None)] - test_markdown = "Hello[@test]" - result = "Hello (Author and Author 2019)[^1]" - assert result == insert_citation_keys( - quads, test_markdown, plugin.csl_file, plugin.bib_data.to_string("bibtex") - ) - - -def test_supressed_authors(plugin_advanced_pandoc): - plugin = plugin_advanced_pandoc - - # Ensure suppressed authors works - quads = [("[-@test]", None, "1", None)] - test_markdown = "Suppressed [-@test]" - result = "Suppressed (2019)[^1]" - assert result == insert_citation_keys( - quads, test_markdown, plugin.csl_file, plugin.bib_data.to_string("bibtex") - ) - - -def test_affixes(plugin_advanced_pandoc): - plugin = plugin_advanced_pandoc - # Ensure affixes work - quads = [("[see @test]", None, "1", None)] - test_markdown = "Hello[see @test]" - result = "Hello (see Author and Author 2019)[^1]" - assert result == insert_citation_keys( - quads, test_markdown, plugin.csl_file, plugin.bib_data.to_string("bibtex") - ) - - quads = [("[@test, p. 123]", None, "1", None)] - test_markdown = "[@test, p. 123]" - result = " (Author and Author 2019, p. 123)[^1]" - assert result == insert_citation_keys( - quads, test_markdown, plugin.csl_file, plugin.bib_data.to_string("bibtex") - ) - - # Combined - quads = [("[see @test, p. 123]", None, "1", None)] - test_markdown = "Hello[see @test, p. 123]" - result = "Hello (see Author and Author 2019, p. 123)[^1]" - assert result == insert_citation_keys( - quads, test_markdown, plugin.csl_file, plugin.bib_data.to_string("bibtex") - ) - - # Combined, suppressed author - quads = [("[see -@test, p. 123]", None, "1", None)] - test_markdown = "Suppressed [see -@test, p. 123]" - result = "Suppressed (see 2019, p. 123)[^1]" - assert result == insert_citation_keys( - quads, test_markdown, plugin.csl_file, plugin.bib_data.to_string("bibtex") - ) - - -def test_invalid_blocks(plugin_advanced_pandoc): - pass - - -def test_citavi_format(plugin_advanced_pandoc): - pass - - -def test_duplicate_reference(plugin_advanced_pandoc): - """ - Ensures duplicats references show up appropriately - # TODO: These test cases don't seem right - """ - plugin = plugin_advanced_pandoc - # Ensure multi references work - quads = [ - ("[@test; @Bivort2016]", None, "1", None), - ("[@test; @Bivort2016]", None, "2", None), - ] - test_markdown = "[@test; @Bivort2016]" - # CSL defines the order, this ordering is therefore expected with springer.csl - result = " (De Bivort and Van Swinderen 2016; Author and Author 2019)[^1][^2]" - assert result == insert_citation_keys( - quads, test_markdown, plugin.csl_file, plugin.bib_data.to_string("bibtex") - ) - - quads = [ - ("[@test, p. 12; @Bivort2016, p. 15]", None, "1", None), - ("[@test, p. 12; @Bivort2016, p. 15]", None, "2", None), - ] - test_markdown = "[@test, p. 12; @Bivort2016, p. 15]" - # CSL defines the order, this ordering is therefore expected with springer.csl - result = " (De Bivort and Van Swinderen 2016, p. 15; Author and Author 2019, p. 12)[^1][^2]" - assert result == insert_citation_keys( - quads, test_markdown, plugin.csl_file, plugin.bib_data.to_string("bibtex") - ) - - -def test_multi_reference(plugin_advanced_pandoc): - """ - Ensures multiple inline references show up appropriately - """ - - plugin = plugin_advanced_pandoc - # Ensure multiple inline references works - quads = [ - ("[@test]", None, "1", None), - ("[see @Bivort2016, p. 123]", None, "2", None), - ] - test_markdown = "Hello[@test] World [see @Bivort2016, p. 123]" - result = "Hello (Author and Author 2019)[^1] World (see De Bivort and Van Swinderen 2016, p. 123)[^2]" - assert result == insert_citation_keys( - quads, test_markdown, plugin.csl_file, plugin.bib_data.to_string("bibtex") - ) - - -def test_custom_footnote_formatting(plugin): - - assert plugin.format_footnote_key(1) == "1" - plugin.footnote_format = "Test Format {number}" - assert plugin.format_footnote_key(1) == "Test Format 1" - - plugin.csl_file = os.path.join(test_files_dir, "nature.csl") - assert ( - "[@test]", - "test", - "Test Format 1", - "Author, F. & Author, S. Test title. *Testing Journal* **1**, (2019).", - ) == plugin.format_citations(["[@test]"])[0] diff --git a/test_files/test_integration.py b/test_files/test_integration.py new file mode 100644 index 0000000..e46cb60 --- /dev/null +++ b/test_files/test_integration.py @@ -0,0 +1,189 @@ +""" +Integration tests for mkdocs-bibtex plugin. These tests verify the complete functionality +of the plugin rather than testing individual components. +""" + +import os +import pytest +import pypandoc +from mkdocs_bibtex.plugin import BibTexPlugin + +module_dir = os.path.dirname(os.path.abspath(__file__)) +test_files_dir = os.path.abspath(os.path.join(module_dir, "..", "test_files")) + + +@pytest.fixture +def plugin(): + """Basic BibTex Plugin without CSL""" + plugin = BibTexPlugin() + plugin.load_config( + options={"bib_file": os.path.join(test_files_dir, "test.bib"), "bib_by_default": False}, + config_file_path=test_files_dir, + ) + plugin.on_config(plugin.config) + + return plugin + + +@pytest.fixture +def pandoc_plugin(plugin): + """BibTex Plugin with Pandoc and CSL support""" + # Skip if Pandoc version is too old + pandoc_version = pypandoc.get_pandoc_version() + if tuple(int(v) for v in pandoc_version.split(".")) <= (2, 11): + pytest.skip(f"Unsupported pandoc version (v{pandoc_version})") + + plugin = BibTexPlugin() + plugin.load_config( + options={ + "bib_file": os.path.join(test_files_dir, "test.bib"), + "csl_file": os.path.join(test_files_dir, "springer-basic-author-date.csl"), + "cite_inline": True, + "bib_by_default": False, + }, + config_file_path=test_files_dir, + ) + plugin.on_config(plugin.config) + # plugin.csl_file = None + + return plugin + + +def test_basic_citation_rendering(plugin): + """Test basic citation functionality without CSL""" + markdown = "Here is a citation [@test] and another one [@test2].\n\n\\bibliography" + result = plugin.on_page_markdown(markdown, None, None, None) + + # Check citation replacements + assert "[^test]" in result + assert "[^test2]" in result + + # Check bibliography entries + assert "First Author and Second Author. Test title. *Testing Journal*, 2019." in result + assert "First Author and Second Author. Test Title (TT). *Testing Journal (TJ)*, 2019." in result + + +def test_pandoc_citation_rendering(pandoc_plugin): + """Test citation rendering with Pandoc and CSL""" + markdown = "Here is a citation [@test] and another [@Bivort2016].\n\n\\bibliography" + result = pandoc_plugin.on_page_markdown(markdown, None, None, None) + + # Check inline citations + assert "(Author and Author 2019)" in result + assert "(De Bivort and Van Swinderen 2016)" in result + + # Check bibliography formatting + assert "Author F, Author S (2019)" in result + assert "De Bivort BL, Van Swinderen B (2016)" in result + + +def test_citation_features(pandoc_plugin): + """Test various citation features like prefixes, suffixes, and author suppression""" + markdown = """ +See [-@test] for more. +As shown by [see @test, p. 123]. +Multiple sources [@test; @test2]. + +\\bibliography + """ + result = pandoc_plugin.on_page_markdown(markdown, None, None, None) + + # Check various citation formats + assert "(2019" in result # Suppressed author + assert "see Author and Author 2019a, p. 123" in result # Prefix and suffix + assert "Author and Author 2019a, b" in result # Multiple citations + + # Check bibliography formatting + assert "Author F, Author S (2019a) Test title. Testing Journal 1:" in result + assert "Author F, Author S (2019b) Test Title (TT). Testing Journal (TJ) 1:" in result + + # Check that the bibliography entries are only shown once + assert result.count("Author F, Author S (2019a) Test title. Testing Journal 1:") == 1 + assert result.count("Author F, Author S (2019b) Test Title (TT). Testing Journal (TJ) 1:") == 1 + + +def test_bibliography_controls(plugin): + """Test bibliography inclusion behavior""" + # Test with explicit bibliography command + markdown = "Citation [@test]\n\n\\bibliography" + result = plugin.on_page_markdown(markdown, None, None, None) + assert "[^test]:" in result + + # Test without bibliography command when bib_by_default is False + markdown = "Citation [@test]" + result = plugin.on_page_markdown(markdown, None, None, None) + assert "[^test]:" not in result + + # Test without bibliography command when bib_by_default is True + plugin.config.bib_by_default = True + result = plugin.on_page_markdown(markdown, None, None, None) + assert "[^test]:" in result + + +def test_custom_footnote_format(): + """Test custom footnote formatting""" + plugin = BibTexPlugin() + plugin.load_config( + options={ + "bib_file": os.path.join(test_files_dir, "test.bib"), + "bib_by_default": False, + "footnote_format": "ref-{key}", + }, + config_file_path=test_files_dir, + ) + plugin.on_config(plugin.config) + + markdown = "Citation [@test]\n\n\\bibliography" + result = plugin.on_page_markdown(markdown, None, None, None) + assert "[^ref-test]" in result + # Test that an invalid footnote format raises an exception + bad_plugin = BibTexPlugin() + bad_plugin.load_config( + options={"footnote_format": ""}, + config_file_path=test_files_dir, + ) + with pytest.raises(Exception): + bad_plugin.on_config(bad_plugin.config) + + +def test_invalid_citations(plugin): + """Test handling of invalid citations""" + markdown = "Invalid citation [@nonexistent]\n\n\\bibliography" + result = plugin.on_page_markdown(markdown, None, None, None) + # assert "[@nonexistent]" in result # Invalid citation should remain unchanged + assert "[^nonexistent]" not in result + + +def test_full_bib_command(plugin): + """Test full bibliography command""" + markdown = "Full bibliography [@test]\n\n\\full_bibliography" + result = plugin.on_page_markdown(markdown, None, None, None) + + assert "Full bibliography [^test]" in result + assert "[^test]:" in result + assert "[^test2]:" in result + assert "[^Bivort2016]:" in result + assert "[^test_citavi]:" in result + + +def test_bib_by_default(plugin): + """Test bib_by_default behavior""" + markdown = "Citation [@test]" + plugin.config.bib_by_default = False + result = plugin.on_page_markdown(markdown, None, None, None) + assert "[^test]:" not in result + + plugin.config.bib_by_default = True + result = plugin.on_page_markdown(markdown, None, None, None) + assert "[^test]:" in result + + +def test_full_bib_command_with_pandoc(pandoc_plugin): + """Test full bibliography command with Pandoc""" + markdown = "Full bibliography\n\n\\full_bibliography" + result = pandoc_plugin.on_page_markdown(markdown, None, None, None) + + assert "[^test]: Author F, Author S (2019a)" in result + assert "[^test2]: Author F, Author S (2019b)" in result + assert "[^Bivort2016]: De Bivort BL, Van Swinderen B (2016)" in result + assert "[^test_citavi]: Author F, Author S (2019c)" in result diff --git a/test_files/test_pandoc_registry.py b/test_files/test_pandoc_registry.py new file mode 100644 index 0000000..5990dce --- /dev/null +++ b/test_files/test_pandoc_registry.py @@ -0,0 +1,200 @@ +import os +import pytest +import pypandoc +from mkdocs_bibtex.registry import PandocRegistry +from mkdocs_bibtex.citation import Citation, CitationBlock + +module_dir = os.path.dirname(os.path.abspath(__file__)) +test_files_dir = os.path.abspath(os.path.join(module_dir, "..", "test_files")) + + +@pytest.fixture +def bib_file(): + return os.path.join(test_files_dir, "test.bib") + + +@pytest.fixture +def csl(): + """Provide the Springer CSL file for testing""" + return os.path.join(test_files_dir, "springer-basic-author-date.csl") + + +@pytest.fixture +def numeric_csl(): + """Provide the Nature CSL file for testing""" + return os.path.join(test_files_dir, "nature.csl") + + +@pytest.fixture +def registry(bib_file, csl): + """Create a registry with Springer style for testing""" + return PandocRegistry([bib_file], csl) + + +@pytest.fixture +def numeric_registry(bib_file, nature_csl): + """Create a registry with Nature style for testing""" + return PandocRegistry([bib_file], nature_csl) + + +def test_bad_pandoc_registry(bib_file): + """Throw error if no CSL file is provided""" + with pytest.raises(Exception): + PandocRegistry([bib_file]) + + +def test_pandoc_registry_initialization(registry, csl): + """Test basic initialization and loading of bib files""" + assert len(registry.bib_data.entries) == 4 + assert registry.csl_file is csl + + +def test_multiple_bib_files(csl): + """Test loading multiple bibliography files""" + bib1 = os.path.join(test_files_dir, "multi_bib", "bib1.bib") + bib2 = os.path.join(test_files_dir, "multi_bib", "multi_bib_child_dir", "bib2.bib") + + registry = PandocRegistry([bib1, bib2], csl) + assert "test1" in registry.bib_data.entries + assert "test2" in registry.bib_data.entries + + # Test citations from both files work + citation1 = Citation("test1", "", "") + citation2 = Citation("test2", "", "") + registry.validate_citation_blocks([CitationBlock([citation1, citation2])]) + text1 = registry.reference_text(citation1) + text2 = registry.reference_text(citation2) + assert "Test title 1" in text1 + assert "Test title 2" in text2 + + +def test_validate_citation_blocks_valid(registry): + """Test validation of valid citation blocks""" + # Single citation + citations = [Citation("test", "", "")] + block = CitationBlock(citations) + registry.validate_citation_blocks([block]) + + # Multiple citations + citations = [Citation("test", "", ""), Citation("test2", "", "")] + block = CitationBlock(citations) + registry.validate_citation_blocks([block]) + + +@pytest.mark.xfail(reason="For some reason pytest does not catch the warning") +def test_validate_citation_blocks_invalid(registry): + """Test validation fails with invalid citation key""" + citations = [Citation("nonexistent", "", "")] + block = CitationBlock(citations) + with pytest.warns(UserWarning, match="Citing unknown reference key nonexistent"): + registry.validate_citation_blocks([block]) + + +def test_inline_text_basic(registry): + """Test basic inline citation formatting with different styles""" + citations = [Citation("test", "", "")] + block = CitationBlock(citations) + registry.validate_citation_blocks([block]) + text = registry.inline_text(block) + assert text # Basic check that we got some text back + assert "Author" in text # Should contain author name + + +def test_inline_text_multiple(registry): + """Test inline citation with multiple references""" + citations = [Citation("test", "", ""), Citation("test2", "", "")] + block = CitationBlock(citations) + registry.validate_citation_blocks([block]) + text = registry.inline_text(block) + assert text + assert "Author" in text + + +# Use springer style for consistent prefix/suffix tests +def test_inline_text_with_prefix(registry): + """Test inline citation with prefix""" + citations = [Citation("test", "see", "")] + block = CitationBlock(citations) + registry.validate_citation_blocks([block]) + text = registry.inline_text(block) + assert text + assert "see" in text.lower() + + +def test_inline_text_with_suffix(registry): + """Test inline citation with suffix""" + citations = [Citation("test", "", "p. 123")] + block = CitationBlock(citations) + registry.validate_citation_blocks([block]) + text = registry.inline_text(block) + assert text + assert "123" in text + + +def test_reference_text(registry): + """Test basic reference text formatting""" + citation = Citation("test", "", "") + block = CitationBlock([citation]) + registry.validate_citation_blocks([block]) + text = registry.reference_text(citation) + # Update assertion to match Springer style + assert "Author" in text and "Test title" in text + + +def test_pandoc_formatting(registry): + """Test formatting with newer Pandoc versions""" + citation = Citation("test", "", "") + block = CitationBlock([citation]) + registry.validate_citation_blocks([block]) + text = registry.reference_text(citation) + assert text == "Author F, Author S (2019) Test title. Testing Journal 1:" + + +def test_multiple_citation_blocks(registry): + """Test multiple citation blocks""" + citations1 = [Citation("test", "", ""), Citation("test2", "", "")] + block1 = CitationBlock(citations1) + + citations2 = [Citation("Bivort2016", "", "")] + block2 = CitationBlock(citations2) + citation_blocks = [block1, block2] + registry.validate_citation_blocks(citation_blocks) + + text = registry.inline_text(block1) + assert text + assert "Author" in text + + # Test individual citations from block1 + text1 = registry.reference_text(citations1[0]) + text2 = registry.reference_text(citations1[1]) + assert text1 + assert text2 + assert "Author" in text1 + assert "Author" in text2 + + text = registry.inline_text(block2) + assert text + assert "Bivort" in text + + +@pytest.mark.skipif( + int(pypandoc.get_pandoc_version().split(".")[0]) < 3, reason="Pandoc formatting is different in Pandoc 3.0" +) +def test_complex_citation_formatting(registry): + """Test complex citation scenarios""" + citations = [ + Citation("test", "see", "p. 123-125"), + Citation("test2", "compare", "chapter 2"), + Citation("Bivort2016", "also", "figure 3"), + ] + block = CitationBlock(citations) + registry.validate_citation_blocks([block]) + text = registry.inline_text(block) + + # Check that prefix, suffix, and multiple citations are formatted correctly + assert "see" in text.lower() + assert "123--125" in text + assert "compare" in text.lower() + assert "chap. 2" in text + assert "also" in text.lower() + assert "fig. 3" in text diff --git a/test_files/test_plugin.py b/test_files/test_plugin.py index 60d1494..c428e0f 100644 --- a/test_files/test_plugin.py +++ b/test_files/test_plugin.py @@ -1,42 +1,11 @@ -import collections.abc import os -import random -import string import pytest -import responses from mkdocs_bibtex.plugin import BibTexPlugin module_dir = os.path.dirname(os.path.abspath(__file__)) test_files_dir = os.path.abspath(os.path.join(module_dir, "..", "test_files")) -MOCK_ZOTERO_URL = "https://api.zotero.org/groups/FOO/collections/BAR/items?format=bibtex" - - -@pytest.fixture -def mock_zotero_api(request: pytest.FixtureRequest) -> collections.abc.Generator[responses.RequestsMock]: - zotero_api_url = "https://api.zotero.org/groups/FOO/collections/BAR/items?format=bibtex&limit=100" - bibtex_contents = generate_bibtex_entries(request.param) - - limit = 100 - pages = [bibtex_contents[i : i + limit] for i in range(0, len(bibtex_contents), limit)] - - with responses.RequestsMock() as mock_api: - for page_num, page in enumerate(pages): - current_start = "" if page_num == 0 else f"&start={page_num * limit}" - next_start = f"&start={(page_num + 1) * limit}" - mock_api.add( - responses.Response( - method="GET", - url=f"{zotero_api_url}{current_start}", - json="\n".join(page), - headers={} - if page_num == len(pages) - 1 - else {"Link": f"<{zotero_api_url}{next_start}>; rel='next'"}, - ) - ) - - yield mock_api @pytest.fixture @@ -52,20 +21,18 @@ def plugin(): def test_bibtex_loading_bibfile(plugin): - assert len(plugin.bib_data.entries) == 4 + assert len(plugin.registry.bib_data.entries) == 4 def test_bibtex_loading_bib_url(): plugin = BibTexPlugin() plugin.load_config( - options={ - "bib_file": "https://raw.githubusercontent.com/shyamd/mkdocs-bibtex/main/test_files/test.bib" - }, + options={"bib_file": "https://raw.githubusercontent.com/shyamd/mkdocs-bibtex/main/test_files/test.bib"}, config_file_path=test_files_dir, ) plugin.on_config(plugin.config) - assert len(plugin.bib_data.entries) == 4 + assert len(plugin.registry.bib_data.entries) == 4 def test_bibtex_loading_bibdir(): @@ -76,97 +43,4 @@ def test_bibtex_loading_bibdir(): ) plugin.on_config(plugin.config) - assert len(plugin.bib_data.entries) == 2 - - -@pytest.mark.parametrize(("mock_zotero_api", "number_of_entries"), ((4, 4), (150, 150)), indirect=["mock_zotero_api"]) -def test_bibtex_loading_zotero(mock_zotero_api: responses.RequestsMock, number_of_entries: int) -> None: - plugin = BibTexPlugin() - plugin.load_config( - options={"bib_file": MOCK_ZOTERO_URL}, - config_file_path=test_files_dir, - ) - - plugin.on_config(plugin.config) - assert len(plugin.bib_data.entries) == number_of_entries - -def test_on_page_markdown(plugin): - """ - This function just tests to make sure the rendered markdown changees with - options and basic functionality works. It doesn't test "features" - """ - # run test with bib_by_default set to False - plugin.config["bib_by_default"] = False - - test_markdown = "This is a citation. [@test]\n\n \\bibliography" - - assert ( - "[^1]: First Author and Second Author. Test title. *Testing Journal*, 2019." - in plugin.on_page_markdown(test_markdown, None, None, None) - ) - - # ensure there are two items in bibliography - test_markdown = "This is a citation. [@test2] This is another citation [@test]\n\n \\bibliography" - - assert "[^2]:" in plugin.on_page_markdown(test_markdown, None, None, None) - - # ensure bib_by_default is working - plugin.config["bib_by_default"] = True - test_markdown = "This is a citation. [@test]" - - assert "[^1]:" in plugin.on_page_markdown(test_markdown, None, None, None) - plugin.config["bib_by_default"] = False - - # ensure nonexistant citekeys are removed correctly (not replaced) - test_markdown = "A non-existant citekey. [@i_do_not_exist]" - - assert "[@i_do_not_exist]" in plugin.on_page_markdown( - test_markdown, None, None, None - ) - - # Ensure if an item is referenced multiple times, it only shows up as one reference - test_markdown = "This is a citation. [@test] This is another citation [@test]\n\n \\bibliography" - - assert "[^2]" not in plugin.on_page_markdown(test_markdown, None, None, None) - - # Ensure item only shows up once even if used in multiple places as both a compound and lone cite key - test_markdown = "This is a citation. [@test; @test2] This is another citation [@test]\n\n \\bibliography" - - assert "[^3]" not in plugin.on_page_markdown(test_markdown, None, None, None) - - -def test_footnote_formatting_config(plugin): - """ - This function tests to ensure footnote formatting configuration is working properly - """ - # Test to make sure the config enforces {number} in the format - bad_plugin = BibTexPlugin() - bad_plugin.load_config( - options={"footnote_format": ""}, - config_file_path=test_files_dir, - ) - - with pytest.raises(Exception): - bad_plugin.on_config(bad_plugin.config) - -def generate_bibtex_entries(n: int) -> list[str]: - """Generates n random bibtex entries.""" - - entries = [] - - for i in range(n): - author_first = "".join(random.choices(string.ascii_letters, k=8)) - author_last = "".join(random.choices(string.ascii_letters, k=8)) - title = "".join(random.choices(string.ascii_letters, k=10)) - journal = "".join(random.choices(string.ascii_uppercase, k=5)) - year = str(random.randint(1950, 2025)) - - entries.append(f""" -@article{{{author_last}_{i}}}, - title = {{{title}}}, - volume = {{1}}, - journal = {{{journal}}}, - author = {{{author_last}, {author_first}}}, - year = {{{year}}}, -""") - return entries + assert len(plugin.registry.bib_data.entries) == 2 diff --git a/test_files/test_simple_registry.py b/test_files/test_simple_registry.py new file mode 100644 index 0000000..7c02e08 --- /dev/null +++ b/test_files/test_simple_registry.py @@ -0,0 +1,98 @@ +import os +import pytest +from mkdocs_bibtex.registry import SimpleRegistry +from mkdocs_bibtex.citation import Citation, CitationBlock + +module_dir = os.path.dirname(os.path.abspath(__file__)) +test_files_dir = os.path.abspath(os.path.join(module_dir, "..", "test_files")) + + +@pytest.fixture +def simple_registry(): + bib_file = os.path.join(test_files_dir, "test.bib") + return SimpleRegistry([bib_file]) + + +def test_simple_registry_initialization(simple_registry): + """Test basic initialization and loading of bib files""" + assert len(simple_registry.bib_data.entries) == 4 + + +def test_validate_citation_blocks_valid(simple_registry): + """Test validation of valid citation blocks""" + # Single citation + citations = [Citation("test", "", "")] + block = CitationBlock(citations) + simple_registry.validate_citation_blocks([block]) + + # Multiple citations + citations = [Citation("test", "", ""), Citation("test2", "", "")] + block = CitationBlock(citations) + simple_registry.validate_citation_blocks([block]) + + +@pytest.mark.xfail(reason="For some reason pytest does not catch the warning") +def test_validate_citation_blocks_invalid_key(simple_registry): + """Test validation fails with invalid citation key""" + citations = [Citation("nonexistent", "", "")] + block = CitationBlock(citations) + with pytest.warns(UserWarning, match="Citing unknown reference key nonexistent"): + simple_registry.validate_citation_blocks([block]) + + +@pytest.mark.xfail(reason="For some reason pytest does not catch the warning") +def test_validate_citation_blocks_invalid_affixes(simple_registry): + """Test validation fails with affixes (not supported in simple mode)""" + # Test prefix + citations = [Citation("test", "see", "")] + block = CitationBlock(citations) + with pytest.warns(UserWarning, match="Simple style does not support any affixes"): + simple_registry.validate_citation_blocks([block]) + + # Test suffix + citations = [Citation("test", "", "p. 123")] + block = CitationBlock(citations) + with pytest.warns(UserWarning, match="Simple style does not support any affixes"): + simple_registry.validate_citation_blocks([block]) + + +def test_inline_text(simple_registry): + """Test inline citation text generation""" + # Single citation + citations = [Citation("test", "", "")] + block = CitationBlock(citations) + assert simple_registry.inline_text(block) == "[^test]" + + # Multiple citations + citations = [Citation("test", "", ""), Citation("test2", "", "")] + block = CitationBlock(citations) + assert simple_registry.inline_text(block) == "[^test][^test2]" + + +def test_reference_text(simple_registry): + """Test reference text generation""" + # Test basic citation + citation = Citation("test", "", "") + assert ( + simple_registry.reference_text(citation) + == "First Author and Second Author. Test title. *Testing Journal*, 2019." + ) + + # Test another basic citation + citation = Citation("test2", "", "") + assert ( + simple_registry.reference_text(citation) + == "First Author and Second Author. Test Title (TT). *Testing Journal (TJ)*, 2019." + ) + + # test long citation + citation = Citation("Bivort2016", "", "") + assert ( + simple_registry.reference_text(citation) + == "Benjamin L. De Bivort and Bruno Van Swinderen. Evidence for selective attention in the insect brain. *Current Opinion in Insect Science*, 15:1–7, 2016. [doi:10.1016/j.cois.2016.02.007](https://doi.org/10.1016/j.cois.2016.02.007)." # noqa: E501 + ) + + # Test citation with URL + citation = Citation("test_citavi", "", "") + expected = "First Author and Second Author. Test Title (TT). *Testing Journal (TJ)*, 2019. URL: [\\\\url\\{https://doi.org/10.21577/0103\\-5053.20190253\\}](\\url{https://doi.org/10.21577/0103-5053.20190253})." + assert simple_registry.reference_text(citation) == expected diff --git a/test_files/test_utils.py b/test_files/test_utils.py index f0bcf38..cc6bd69 100644 --- a/test_files/test_utils.py +++ b/test_files/test_utils.py @@ -1,84 +1,43 @@ -import os - import pytest -from mkdocs_bibtex.utils import ( - find_cite_blocks, - format_simple, - format_pandoc, - extract_cite_keys, - sanitize_zotero_query, -) - -from mkdocs_bibtex.plugin import parse_file - -module_dir = os.path.dirname(os.path.abspath(__file__)) -test_files_dir = os.path.abspath(os.path.join(module_dir, "..", "test_files")) - - -@pytest.fixture -def entries(): - bibdata = parse_file(os.path.join(test_files_dir, "test.bib")) - return bibdata.entries - - -def test_find_cite_blocks(): - - # Suppressed authors - assert find_cite_blocks("[-@test]") == ["[-@test]"] - # Affixes - assert find_cite_blocks("[see @test]") == ["[see @test]"] - assert find_cite_blocks("[@test, p. 15]") == ["[@test, p. 15]"] - assert find_cite_blocks("[see @test, p. 15]") == ["[see @test, p. 15]"] - assert find_cite_blocks("[see -@test, p. 15]") == ["[see -@test, p. 15]"] - # Invalid blocks - assert find_cite_blocks("[ @test]") is not True - # Citavi . format - assert find_cite_blocks("[@Bermudez.2020]") == ["[@Bermudez.2020]"] - - -def test_format_simple(entries): - citations = format_simple(entries) - - assert all(k in citations for k in entries) - assert all(entry != citations[k] for k, entry in entries.items()) - - assert ( - citations["test"] - == "First Author and Second Author. Test title. *Testing Journal*, 2019." - ) - assert ( - citations["test2"] - == "First Author and Second Author. Test Title (TT). *Testing Journal (TJ)*, 2019." - ) - - -def test_format_pandoc(entries): - citations = format_pandoc(entries, os.path.join(test_files_dir, "nature.csl")) - - assert all(k in citations for k in entries) - assert all(entry != citations[k] for k, entry in entries.items()) +from mkdocs_bibtex.utils import sanitize_zotero_query, tempfile_from_zotero_url +import collections.abc +import os +import random +import string - assert ( - citations["test"] - == "Author, F. & Author, S. Test title. *Testing Journal* **1**, (2019)." - ) - assert ( - citations["test2"] - == "Author, F. & Author, S. Test Title (TT). *Testing Journal (TJ)* **1**, (2019)." - ) +import responses +from pybtex.database import parse_file +EXAMPLE_ZOTERO_API_ENDPOINT = "https://api.zotero.org/groups/FOO/collections/BAR/items" -def test_extract_cite_key(): - """ - Test to ensure the extract regex can handle all bibtex keys - TODO: Make this fully compliant with bibtex keys allowed characters - """ - assert extract_cite_keys("[@test]") == ["test"] - assert extract_cite_keys("[@test.3]") == ["test.3"] +MOCK_ZOTERO_URL = "https://api.zotero.org/groups/FOO/collections/BAR/items?format=bibtex" -EXAMPLE_ZOTERO_API_ENDPOINT = "https://api.zotero.org/groups/FOO/collections/BAR/items" +@pytest.fixture +def mock_zotero_api(request: pytest.FixtureRequest) -> collections.abc.Generator[responses.RequestsMock]: + zotero_api_url = "https://api.zotero.org/groups/FOO/collections/BAR/items?format=bibtex&limit=100" + bibtex_contents = generate_bibtex_entries(request.param) + + limit = 100 + pages = [bibtex_contents[i : i + limit] for i in range(0, len(bibtex_contents), limit)] + + with responses.RequestsMock() as mock_api: + for page_num, page in enumerate(pages): + current_start = "" if page_num == 0 else f"&start={page_num * limit}" + next_start = f"&start={(page_num + 1) * limit}" + mock_api.add( + responses.Response( + method="GET", + url=f"{zotero_api_url}{current_start}", + json="\n".join(page), + headers={} + if page_num == len(pages) - 1 + else {"Link": f"<{zotero_api_url}{next_start}>; rel='next'"}, + ) + ) + + yield mock_api @pytest.mark.parametrize( @@ -105,3 +64,38 @@ def test_extract_cite_key(): ) def test_sanitize_zotero_query(zotero_url: str, expected_sanitized_url: str) -> None: assert sanitize_zotero_query(url=zotero_url) == expected_sanitized_url + + +@pytest.mark.parametrize(("mock_zotero_api", "number_of_entries"), ((4, 4), (150, 150)), indirect=["mock_zotero_api"]) +def test_bibtex_loading_zotero(mock_zotero_api: responses.RequestsMock, number_of_entries: int) -> None: + bib_file = tempfile_from_zotero_url("Bib File", MOCK_ZOTERO_URL, ".bib") + + assert os.path.exists(bib_file) + assert os.path.getsize(bib_file) > 0 + + bibdata = parse_file(bib_file) + + assert len(bibdata.entries) == number_of_entries + + +def generate_bibtex_entries(n: int) -> list[str]: + """Generates n random bibtex entries.""" + + entries = [] + + for i in range(n): + author_first = "".join(random.choices(string.ascii_letters, k=8)) + author_last = "".join(random.choices(string.ascii_letters, k=8)) + title = "".join(random.choices(string.ascii_letters, k=10)) + journal = "".join(random.choices(string.ascii_uppercase, k=5)) + year = str(random.randint(1950, 2025)) + + entries.append(f""" +@article{{{author_last}_{i}}}, + title = {{{title}}}, + volume = {{1}}, + journal = {{{journal}}}, + author = {{{author_last}, {author_first}}}, + year = {{{year}}}, +""") + return entries