Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
zepinglee committed Nov 1, 2023
1 parent d14ec02 commit 867cc0d
Show file tree
Hide file tree
Showing 15 changed files with 308 additions and 118 deletions.
36 changes: 35 additions & 1 deletion citeproc/citeproc-latex-parser.lua
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ function latex_parser.latex_to_pseudo_html(str, strict, case_protection)
latex_parser.convert_ast_to_unicode(ast)
local inlines = latex_parser.convert_tokens_to_inlines(ast, strict, case_protection)
-- util.debug(inlines)
if not case_protection then
-- util.debug(inlines)
inlines = latex_parser.remove_case_protection(inlines, false)
-- util.debug(inlines)
end

local pseudo_html_format = markup.PseudoHtml:new()
local res = pseudo_html_format:write_inlines(inlines, {})
return res
Expand All @@ -69,10 +75,18 @@ end
function latex_parser.latex_to_sentence_case_pseudo_html(str, keep_unknown_commands, case_protection, check_sentence_case)
local ast = latex_parser.latex_grammar:match(str)
latex_parser.convert_ast_to_unicode(ast)
local inlines = latex_parser.convert_tokens_to_inlines(ast, keep_unknown_commands, case_protection)
local inlines = latex_parser.convert_tokens_to_inlines(ast, keep_unknown_commands, true)
local pseudo_html_format = markup.PseudoHtml:new()
-- util.debug(inlines)
pseudo_html_format:convert_sentence_case(inlines, check_sentence_case)
-- util.debug(inlines)

if not case_protection then
-- util.debug(inlines)
inlines = latex_parser.remove_case_protection(inlines, false)
-- util.debug(inlines)
end

local res = pseudo_html_format:write_inlines(inlines, {})
return res
end
Expand Down Expand Up @@ -398,6 +412,26 @@ function latex_parser.convert_group_to_inlines(token, strict, case_protection, f
end


---@param inlines InlineElement[]
---@param keep_minimal boolean
function latex_parser.remove_case_protection(inlines, keep_minimal)
local res = {}
for _, inline in ipairs(inlines) do
if inline._type == "NoCase" then
for _, no_case_inline in ipairs(inline.inlines) do
table.insert(res, no_case_inline)
end
else
if inline.inlines then
inline.inlines = latex_parser.remove_case_protection(inline.inlines, keep_minimal)
end
table.insert(res, inline)
end
end
return res
end


---A group is surrounded with braces in these cases:
---1. Following a command; e.g, `\textbf{foo}` and `\textcolor{red}{flag}`
---2. Containing a command; e.g, {\bfseies foo}
Expand Down
2 changes: 1 addition & 1 deletion citeproc/citeproc-output.lua
Original file line number Diff line number Diff line change
Expand Up @@ -938,7 +938,7 @@ function OutputFormat:apply_text_case_inner(inlines, text_case, seen_one, is_upp
local is_last = (i == #inlines)
if inline._type == "PlainText" then
-- util.debug(inline.value)
-- util.debug(text_case)
-- util.debug(seen_one)
inline.value = self:transform_case(inline.value, text_case, seen_one, is_last, is_uppercase);
-- util.debug(inline.value)
seen_one = seen_one or string_contains_word(inline.value)
Expand Down
156 changes: 156 additions & 0 deletions tests/bbt-test.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
local bibtex_data
local bibtex_parser
local bibtex2csl
local json_encode
local json_decode
local latex_parser
local util
if kpse then
kpse.set_program_name("luatex")
local kpse_searcher = package.searchers[2]
---@diagnostic disable-next-line: duplicate-set-field
package.searchers[2] = function (pkg_name)
local pkg_file = package.searchpath(pkg_name, package.path)
if pkg_file then
return loadfile(pkg_file)
end
return kpse_searcher(pkg_name)
end
bibtex_data = require("citeproc-bibtex-data")
bibtex_parser = require("citeproc-bibtex-parser")
bibtex2csl = require("citeproc-bibtex2csl")
require("lualibs")
json_encode = utilities.json.tojson
json_decode = utilities.json.tolua
latex_parser = require("citeproc-latex-parser")
util = require("citeproc-util")
else
bibtex_data = require("citeproc.bibtex-data")
bibtex_parser = require("citeproc.bibtex-parser")
bibtex2csl = require("citeproc.bibtex2csl")
json_decode = require("dkjson").encode
json_decode = require("dkjson").decode
latex_parser = require("citeproc.latex-parser")
util = require("citeproc.util")
end


local function listdir(path)
local files = {}
for file in lfs.dir(path) do
if not string.match(file, "^%.") then
table.insert(files, file)
end
end
table.sort(files)
return files
end


local function test_bib_json(bib_contents, sentence_case_title, check_sentence_case, case_protection, baseline_path)
local bib_data, exceptions = bibtex_parser.parse(bib_contents, {})
if bib_data then
bib_data = {
entries = bib_data.entries
}
else
bib_data = {
entries = {},
}
end

local baseline_content = util.read_file(baseline_path)
local baseline = json_decode(baseline_content)

for i, entry in ipairs(bib_data.entries) do

describe("#" .. entry.key, function ()

for field, value in pairs(entry.fields) do

local field_type = nil
if bibtex_data.fields[field] then
field_type = bibtex_data.fields[field].type
end

if field_type == "name" then
-- value = latex_parser.latex_to_pseudo_html(value)

elseif field_type == "date" then
-- value = latex_parser.latex_to_pseudo_html(value)

elseif sentence_case_title and entry.type ~= "jurisdiction" and (field == "title"
or field == "subtitle"
or field == "shorttitle"
or field == "booktitle"
or field == "booksubtitle"
or field == "issuetitle"
or field == "issuesubtitle"
or field == "maintitle"
or field == "mainsubtitle"
or field == "eventtitle"
or field == "origtitle"
or field == "series"
or field == "type"
) then

it("#" .. field, function ()
value = latex_parser.latex_to_sentence_case_pseudo_html(value, true, case_protection, check_sentence_case)
assert.equal(baseline.entries[i].fields[field], value)
end)

else
-- value = latex_parser.latex_to_pseudo_html(value, true, false)
end

value = string.gsub(value, '</span>([%d%p%s]*)<span class="nocase">', "%1")
entry.fields[field] = value
end

end)

end

end


local function main()
local bib_dir = "./tests/bbt/bib"
for _, file in ipairs(listdir(bib_dir)) do
if string.match(file, "%.bib$") then

describe(file, function ()
local bib_path = bib_dir .. "/" .. file
local bib_contents = util.read_file(bib_path)
if not bib_contents then
error(string.format('File not found: "%s"', bib_path))
end

local sentence_case_options = {"on+guess", "on", "off"}
local case_protection_options = {"as-needed", "strict", "off"}

for _, sentence_case in ipairs(sentence_case_options) do
for _, caseprotection in ipairs(case_protection_options) do
-- print(sentence_case, caseprotection)
local config = string.format("sentencecase=%s^caseprotection=%s", sentence_case, caseprotection)
local sentence_case_title = (sentence_case ~= "off")
local case_protection = (caseprotection ~= "off")
local check_sentence_case = (sentence_case == "on+guess")

local json_path = string.format("./tests/bbt/converted/%s/%s", config, file:gsub("%.bib$", ".json"))
local baseline_path = string.format("./tests/bbt/baseline/%s/%s", config, file:gsub("%.bib$", ".json"))

describe(config, function ()
-- print(json_path)
test_bib_json(bib_contents, sentence_case_title, check_sentence_case, case_protection, baseline_path)
end)

end
end
end)
end
end
end


main()
Original file line number Diff line number Diff line change
Expand Up @@ -1986,7 +1986,7 @@
"related": "10.7:69r",
"relatedstring": "Review of the TV series episode",
"relatedtype": "reviewof",
"title": "<span class=\"nocase\">The good place</span> ends its Remarkable Second Season With Irrational Hope, Unexpected Gifts, and a Smile",
"title": "<i>The Good Place</i> ends its Remarkable Second Season With Irrational Hope, Unexpected Gifts, and a Smile",
"url": "https://www.avclub.com/the-good-place-ends-its-remarkable-second-season-with-i-1822649316"
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1986,7 +1986,7 @@
"related": "10.7:69r",
"relatedstring": "Review of the TV series episode",
"relatedtype": "reviewof",
"title": "The good place ends its Remarkable Second Season With Irrational Hope, Unexpected Gifts, and a Smile",
"title": "<i>The Good Place</i> ends its Remarkable Second Season With Irrational Hope, Unexpected Gifts, and a Smile",
"url": "https://www.avclub.com/the-good-place-ends-its-remarkable-second-season-with-i-1822649316"
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1986,7 +1986,7 @@
"related": "10.7:69r",
"relatedstring": "Review of the TV series episode",
"relatedtype": "reviewof",
"title": "<span class=\"nocase\">The good place</span> ends its Remarkable Second Season With Irrational Hope, Unexpected Gifts, and a Smile",
"title": "<i>The Good Place</i> ends its Remarkable Second Season With Irrational Hope, Unexpected Gifts, and a Smile",
"url": "https://www.avclub.com/the-good-place-ends-its-remarkable-second-season-with-i-1822649316"
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1986,7 +1986,7 @@
"related": "10.7:69r",
"relatedstring": "Review of the TV series episode",
"relatedtype": "reviewof",
"title": "<span class=\"nocase\">The good place</span> ends its remarkable second season with irrational hope, unexpected gifts, and a smile",
"title": "<i>The good place</i> ends its remarkable second season with irrational hope, unexpected gifts, and a smile",
"url": "https://www.avclub.com/the-good-place-ends-its-remarkable-second-season-with-i-1822649316"
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1986,7 +1986,7 @@
"related": "10.7:69r",
"relatedstring": "Review of the TV series episode",
"relatedtype": "reviewof",
"title": "The good place ends its remarkable second season with irrational hope, unexpected gifts, and a smile",
"title": "<i>The good place</i> ends its remarkable second season with irrational hope, unexpected gifts, and a smile",
"url": "https://www.avclub.com/the-good-place-ends-its-remarkable-second-season-with-i-1822649316"
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1986,7 +1986,7 @@
"related": "10.7:69r",
"relatedstring": "Review of the TV series episode",
"relatedtype": "reviewof",
"title": "<span class=\"nocase\">The good place</span> ends its remarkable second season with irrational hope, unexpected gifts, and a smile",
"title": "<i>The good place</i> ends its remarkable second season with irrational hope, unexpected gifts, and a smile",
"url": "https://www.avclub.com/the-good-place-ends-its-remarkable-second-season-with-i-1822649316"
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1986,7 +1986,7 @@
"related": "10.7:69r",
"relatedstring": "Review of the TV series episode",
"relatedtype": "reviewof",
"title": "<span class=\"nocase\">The good place</span> ends its remarkable second season with irrational hope, unexpected gifts, and a smile",
"title": "<i>The good place</i> ends its remarkable second season with irrational hope, unexpected gifts, and a smile",
"url": "https://www.avclub.com/the-good-place-ends-its-remarkable-second-season-with-i-1822649316"
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1986,7 +1986,7 @@
"related": "10.7:69r",
"relatedstring": "Review of the TV series episode",
"relatedtype": "reviewof",
"title": "The good place ends its remarkable second season with irrational hope, unexpected gifts, and a smile",
"title": "<i>The good place</i> ends its remarkable second season with irrational hope, unexpected gifts, and a smile",
"url": "https://www.avclub.com/the-good-place-ends-its-remarkable-second-season-with-i-1822649316"
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1986,7 +1986,7 @@
"related": "10.7:69r",
"relatedstring": "Review of the TV series episode",
"relatedtype": "reviewof",
"title": "<span class=\"nocase\">The good place</span> ends its remarkable second season with irrational hope, unexpected gifts, and a smile",
"title": "<i>The good place</i> ends its remarkable second season with irrational hope, unexpected gifts, and a smile",
"url": "https://www.avclub.com/the-good-place-ends-its-remarkable-second-season-with-i-1822649316"
}
},
Expand Down
2 changes: 1 addition & 1 deletion tests/bbt/bib/biblatex-apa-test-references.bib
Original file line number Diff line number Diff line change
Expand Up @@ -1676,7 +1676,7 @@ @BOOK{10.7:68r
% emphasise" part of the title
@ONLINE{10.7:69,
AUTHOR = {D. Perkins},
TITLE = {\textup{The good place} ends its Remarkable Second
TITLE = {{\emph{The Good Place}} ends its Remarkable Second
Season With Irrational Hope, Unexpected Gifts, and a Smile},
RELATED = {10.7:69r},
RELATEDTYPE = {reviewof},
Expand Down
Loading

0 comments on commit 867cc0d

Please sign in to comment.