Skip to content

Commit

Permalink
UBXFConverter: ignore footnote content
Browse files Browse the repository at this point in the history
When running `createsrcloc` or `analyze`, ignore `\w` tags inside
footnotes. They are used by UHB to denote alternative readings, and
obviously should not be counted as source locations.

While we are at it, make sure that AugmentGrammar's `dump` and
`dumpwords` options output UTF-8.
  • Loading branch information
schierlm committed Jan 8, 2025
1 parent b51e0b9 commit ca1565e
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
Expand Down Expand Up @@ -59,7 +61,7 @@ public void doExport(Bible bible, String... exportArgs) throws Exception {
}
if (exportArgs[0].equals("dump")) {
boolean humanStrongs = exportArgs.length > 2 && exportArgs[2].equals("humanStrongs");
try (BufferedWriter bw = new BufferedWriter(new FileWriter(exportArgs[1]))) {
try (BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(exportArgs[1]), StandardCharsets.UTF_8))) {
runOperation(bible, new GrammarOperation() {

private int counter = 0;
Expand Down Expand Up @@ -88,7 +90,7 @@ public Visitor<RuntimeException> handleGrammar(Reference reference, Visitor<Runt
}
} else if (exportArgs[0].equals("dumpwords")) {
boolean humanStrongs = exportArgs.length > 2 && exportArgs[2].equals("humanStrongs");
try (BufferedWriter bw = new BufferedWriter(new FileWriter(exportArgs[1]))) {
try (BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(exportArgs[1]), StandardCharsets.UTF_8))) {
runOperation(bible, new GrammarOperation() {

private int counter = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import biblemulticonverter.format.paratext.ParatextBook.ParatextID;
import biblemulticonverter.format.paratext.ParatextCharacterContent.AutoClosingFormatting;
import biblemulticonverter.format.paratext.ParatextCharacterContent.AutoClosingFormattingKind;
import biblemulticonverter.format.paratext.ParatextCharacterContent.FootnoteXref;
import biblemulticonverter.format.paratext.ParatextCharacterContent.Milestone;
import biblemulticonverter.format.paratext.ParatextCharacterContent.ParatextCharacterContentPart;
import biblemulticonverter.format.paratext.ParatextCharacterContent.Reference;
Expand Down Expand Up @@ -74,7 +75,7 @@ public void doExportBooks(List<ParatextBook> books, String... exportArgs) throws
if (exportArgs[0].equals("createsrcloc") && exportArgs[2].equals("--")) {
final String prefix = exportArgs[1];
for (ParatextBook book : books) {
book.accept(new UBXFBookVisitor(book.getId(), new UBXFGrammarHandlerVisitor() {
book.accept(new UBXFBookVisitor(book.getId(), false, new UBXFGrammarHandlerVisitor() {
int index;

@Override
Expand All @@ -96,7 +97,7 @@ protected void handleWordlist(AutoClosingFormatting acf, Reference where) {
Map<String, Integer> occurrences = new HashMap<>();
Map<String, String> words = new HashMap<>();
for (ParatextBook book : books) {
book.accept(new UBXFBookVisitor(book.getId(), new UBXFGrammarHandlerVisitor() {
book.accept(new UBXFBookVisitor(book.getId(), false, new UBXFGrammarHandlerVisitor() {
private void extractContent(StringBuilder sb, ParatextCharacterContentContainer pccc) {
for (ParatextCharacterContentPart part : pccc.getContent()) {
if (part instanceof ParatextCharacterContentContainer) {
Expand Down Expand Up @@ -156,7 +157,7 @@ protected void handleWordlist(AutoClosingFormatting acf, Reference where) {
props.load(fis);
}
for (ParatextBook book : books) {
book.accept(new UBXFBookVisitor(book.getId(), new UBXFGrammarHandlerVisitor() {
book.accept(new UBXFBookVisitor(book.getId(), true, new UBXFGrammarHandlerVisitor() {
@Override
protected void handleAlignMilestone(boolean start, Milestone milestone, Reference where) {
if (!start)
Expand Down Expand Up @@ -187,7 +188,7 @@ protected void handleWordlist(AutoClosingFormatting acf, Reference where) {
} else if ((exportArgs[0].equals("fillwordattr") || exportArgs[0].equals("createwordattr")) && exportArgs[1].equals("--")) {
final boolean restructure = exportArgs[0].equals("createwordattr");
for (ParatextBook book : books) {
book.accept(new UBXFBookVisitor(book.getId(), new UBXFGrammarHandlerVisitor() {
book.accept(new UBXFBookVisitor(book.getId(), true, new UBXFGrammarHandlerVisitor() {
private void restructure(ParatextCharacterContentContainer pccc, boolean inWordlist, boolean inNewWordlist) {
for (int i = 0; i < pccc.getContent().size(); i++) {
ParatextCharacterContentPart part = pccc.getContent().get(i);
Expand Down Expand Up @@ -243,11 +244,11 @@ private void restructure(ParatextCharacterContentContainer pccc, boolean inWordl
}

@Override
protected void handleContent(ParatextCharacterContentContainer pccc) {
protected void handleContent(ParatextCharacterContentContainer pccc, boolean enterFootnotes) {
if (restructure) {
restructure(pccc, false, false);
}
super.handleContent(pccc);
super.handleContent(pccc, enterFootnotes);
}

List<Milestone> openMilestones = new ArrayList<>();
Expand Down Expand Up @@ -289,7 +290,7 @@ protected void handleWordlist(AutoClosingFormatting acf, Reference where) {
formatArg = 2;
} else if (exportArgs[0].equals("convertgrammar") && exportArgs[1].equals("--")) {
for (ParatextBook book : books) {
book.accept(new UBXFBookVisitor(book.getId(), new UBXFGrammarHandlerVisitor() {
book.accept(new UBXFBookVisitor(book.getId(), true, new UBXFGrammarHandlerVisitor() {

private String getHebPrefixStrong(char ch) {
switch (ch) {
Expand Down Expand Up @@ -435,10 +436,12 @@ private static class UBXFBookVisitor implements ParatextBookContentVisitor<Runti

private final ParatextID bookID;
private final UBXFGrammarHandlerVisitor ghv;
private final boolean enterFootnotes;
private int chapterNumber = -1;

public UBXFBookVisitor(ParatextID bookID, UBXFGrammarHandlerVisitor ghv) {
public UBXFBookVisitor(ParatextID bookID, boolean enterFootnotes, UBXFGrammarHandlerVisitor ghv) {
this.bookID = bookID;
this.enterFootnotes = enterFootnotes;
this.ghv = ghv;
}

Expand Down Expand Up @@ -490,7 +493,7 @@ public void visitFigure(String caption, Map<String, String> attributes) throws R

@Override
public void visitParatextCharacterContent(ParatextCharacterContent content) throws RuntimeException {
ghv.handleContent(content);
ghv.handleContent(content, enterFootnotes);
}
}

Expand All @@ -507,7 +510,7 @@ public void setWhere(Reference newWhere) {
protected void handleAlignMilestone(boolean start, Milestone milestone, Reference where) {
}

protected void handleContent(ParatextCharacterContentContainer pccc) {
protected void handleContent(ParatextCharacterContentContainer pccc, boolean enterFootnotes) {
for (ParatextCharacterContentPart part : pccc.getContent()) {
if (part instanceof AutoClosingFormatting && ((AutoClosingFormatting) part).getKind() == AutoClosingFormattingKind.WORDLIST) {
handleWordlist((AutoClosingFormatting) part, where);
Expand All @@ -519,8 +522,8 @@ protected void handleContent(ParatextCharacterContentContainer pccc) {
handleAlignMilestone(false, milestone, where);
}
}
if (part instanceof ParatextCharacterContentContainer) {
handleContent((ParatextCharacterContentContainer) part);
if (part instanceof ParatextCharacterContentContainer && (enterFootnotes || !(part instanceof FootnoteXref))) {
handleContent((ParatextCharacterContentContainer) part, enterFootnotes);
}
}
}
Expand Down

0 comments on commit ca1565e

Please sign in to comment.