Skip to content

Commit

Permalink
bring formatting to utils
Browse files Browse the repository at this point in the history
  • Loading branch information
shyamd committed Jan 20, 2022
1 parent 4461c8d commit f5193b3
Show file tree
Hide file tree
Showing 2 changed files with 138 additions and 46 deletions.
131 changes: 89 additions & 42 deletions mkdocs_bibtex/utils.py
Original file line number Diff line number Diff line change
@@ -1,65 +1,112 @@
import re
import tempfile
from collections import OrderedDict
from itertools import groupby
from pathlib import Path

import pypandoc
from pybtex.backends.markdown import Backend as MarkdownBackend
from pybtex.database import BibliographyData
from pybtex.style.formatting.plain import Style as PlainStyle


def to_markdown_pandoc(entry, csl_path):
def format_simple(entries):
"""
Format the entries using a simple built in style
Args:
entries (dict): dictionary of entries
Returns:
references (dict): dictionary of citation texts
"""
style = PlainStyle()
backend = MarkdownBackend()
citations = OrderedDict()
for key, entry in entries.items():
formatted_entry = style.format_entry("", entry)
entry_text = formatted_entry.text.render(backend)
entry_text = entry_text.replace("\n", " ")
# Local reference list for this file
citations[key] = entry_text
return citations


def format_pandoc(entries, csl_path):
"""
Format the entries using pandoc
Args:
entries (dict): dictionary of entries
csl_path (str): path to formatting CSL Fle
Returns:
references (dict): dictionary of citation texts
"""
pandoc_version = tuple(int(ver) for ver in pypandoc.get_pandoc_version().split("."))
citations = OrderedDict()
for key, entry in entries.items():
bibtex_string = BibliographyData(entries={entry.key: entry}).to_string("bibtex")
if pandoc_version >= (2, 11):
citations[key] = _convert_pandoc_new(bibtex_string, csl_path)
else:
citations[key] = _convert_pandoc_legacy(bibtex_string, csl_path)

return citations


def _convert_pandoc_new(bibtex_string, csl_path):
"""
Converts the PyBtex entry into formatted markdown citation text
using pandoc version 2.11 or newer
"""
bibtex_string = BibliographyData(entries={entry.key: entry}).to_string("bibtex")
if tuple(int(ver) for ver in pypandoc.get_pandoc_version().split(".")) >= (
2,
11,
):
markdown = pypandoc.convert_text(
source=bibtex_string,
to="markdown-citations",
format="bibtex",
extra_args=[
"--citeproc",
"--csl",
csl_path,
],
)
markdown = pypandoc.convert_text(
source=bibtex_string,
to="markdown-citations",
format="bibtex",
extra_args=[
"--citeproc",
"--csl",
csl_path,
],
)

# This should cut off the pandoc preamble and ending triple colons
markdown = " ".join(markdown.split("\n")[2:-2])

citation_regex = re.compile(r"\{\.csl-left-margin\}\[(.*)\]\{\.csl-right-inline\}")
try:

citation = citation_regex.findall(markdown)[0]
except IndexError:
citation = markdown
return citation

# This should cut off the pandoc preamble and ending triple colons
markdown = " ".join(markdown.split("\n")[2:-2])

citation_regex = re.compile(
r"\{\.csl-left-margin\}\[(.*)\]\{\.csl-right-inline\}"
)
try:

citation = citation_regex.findall(markdown)[0]
except IndexError:
citation = markdown
else:
# Older citeproc-filter version of pandoc
with tempfile.TemporaryDirectory() as tmpdir:
bib_path = Path(tmpdir).joinpath("temp.bib")
with open(bib_path, "w") as bibfile:
bibfile.write(bibtex_string)
citation_text = """
def _convert_pandoc_legacy(bibtex_string, csl_file):
"""
Converts the PyBtex entry into formatted markdown citation text
using pandoc version older than 2.11
"""
with tempfile.TemporaryDirectory() as tmpdir:
bib_path = Path(tmpdir).joinpath("temp.bib")
with open(bib_path, "w") as bibfile:
bibfile.write(bibtex_string)
citation_text = """
---
nocite: '@*'
---
"""

markdown = pypandoc.convert_text(
source=citation_text,
to="markdown_strict",
format="md",
extra_args=["--csl", csl_path, "--bibliography", bib_path],
filters=["pandoc-citeproc"],
)
markdown = pypandoc.convert_text(
source=citation_text,
to="markdown_strict",
format="md",
extra_args=["--csl", csl_path, "--bibliography", bib_path],
filters=["pandoc-citeproc"],
)

citation_regex = re.compile(r"(1\.)?(.*)")
citation = citation_regex.findall(markdown.replace("\n", " "))[0]
citation_regex = re.compile(r"(1\.)?(.*)")
citation = citation_regex.findall(markdown.replace("\n", " "))[0]
return citation


Expand Down
53 changes: 49 additions & 4 deletions test_files/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,14 @@

import pytest

from mkdocs_bibtex.plugin import BibTexPlugin
from mkdocs_bibtex.utils import (find_cite_keys, format_bibliography,
insert_citation_keys)
from mkdocs_bibtex.plugin import BibTexPlugin, parse_file
from mkdocs_bibtex.utils import (
find_cite_keys,
format_bibliography,
insert_citation_keys,
format_simple,
format_pandoc,
)

module_dir = os.path.dirname(os.path.abspath(__file__))
test_files_dir = os.path.abspath(os.path.join(module_dir, "..", "test_files"))
Expand All @@ -21,6 +26,12 @@ def plugin():
return plugin


@pytest.fixture
def entries():
bibdata = parse_file(os.path.join(test_files_dir, "test.bib"))
return bibdata.entries


def test_bibtex_loading_bibfile(plugin):
assert len(plugin.bib_data.entries) == 3

Expand Down Expand Up @@ -180,4 +191,38 @@ def test_format_bibliography():
assert (
"[^2]: First Author and Second Author\\. Test Title \\(TT\\)\\. *Testing Journal \\(TJ\\)*, 2019"
in bib
)
)


def test_format_simple(entries):
citations = format_simple(entries)

assert all(k in citations for k in entries)
assert all(entry != citations[k] for k, entry in entries.items())

print(citations)
assert (
citations["test"]
== "First Author and Second Author\\. Test title\\. *Testing Journal*, 2019\\."
)
assert (
citations["test2"]
== "First Author and Second Author\\. Test Title \\(TT\\)\\. *Testing Journal \\(TJ\\)*, 2019\\."
)


def test_format_pandoc(entries):
citations = format_pandoc(entries, os.path.join(test_files_dir, "nature.csl"))

assert all(k in citations for k in entries)
assert all(entry != citations[k] for k, entry in entries.items())

print(citations)
assert (
citations["test"]
== "Author, F. & Author, S. Test title. *Testing Journal* **1**, (2019)."
)
assert (
citations["test2"]
== "Author, F. & Author, S. Test Title (TT). *Testing Journal (TJ)* **1**, (2019)."
)

0 comments on commit f5193b3

Please sign in to comment.