Skip to content

Commit

Permalink
Also include "input index" in SourceSpan
Browse files Browse the repository at this point in the history
The existing line/column indexes in `SourceSpan` are useful for some cases,
e.g. editors that are line based. But for other cases, it's useful to be able
to get the index within the original input string.

An example: If the input string is "foo\n\nbar", the "bar" paragraph has
the following `SourceSpan`: line 2 (third line), column 0, length 3.
With this change, now it also includes the input index: 5 ("b" is the character
at index 5 in the string). That means it's possible to use e.g. `substring`
instead of having to split the input text into lines first.
  • Loading branch information
robinst committed Oct 16, 2024
1 parent 6e93f85 commit a6b3daa
Show file tree
Hide file tree
Showing 17 changed files with 425 additions and 262 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,7 @@ private static Text createTextNode(String literal, Span span, SourceSpan sourceS
String text = literal.substring(beginIndex, endIndex);
Text textNode = new Text(text);
if (sourceSpan != null) {
int length = endIndex - beginIndex;
textNode.addSourceSpan(SourceSpan.of(sourceSpan.getLineIndex(), beginIndex, length));
textNode.addSourceSpan(sourceSpan.subSpan(beginIndex, endIndex));
}
return textNode;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,43 +71,43 @@ public void sourceSpans() {

Paragraph paragraph = (Paragraph) document.getFirstChild();
Text abc = (Text) paragraph.getFirstChild();
assertEquals(List.of(SourceSpan.of(0, 0, 3)),
assertEquals(List.of(SourceSpan.of(0, 0, 0, 3)),
abc.getSourceSpans());

assertTrue(abc.getNext() instanceof SoftLineBreak);

Link one = (Link) abc.getNext().getNext();
assertEquals("http://example.com/one", one.getDestination());
assertEquals(List.of(SourceSpan.of(1, 0, 22)),
assertEquals(List.of(SourceSpan.of(1, 0, 4, 22)),
one.getSourceSpans());

assertTrue(one.getNext() instanceof SoftLineBreak);

Text def = (Text) one.getNext().getNext();
assertEquals("def ", def.getLiteral());
assertEquals(List.of(SourceSpan.of(2, 0, 4)),
assertEquals(List.of(SourceSpan.of(2, 0, 27, 4)),
def.getSourceSpans());

Link two = (Link) def.getNext();
assertEquals("http://example.com/two", two.getDestination());
assertEquals(List.of(SourceSpan.of(2, 4, 22)),
assertEquals(List.of(SourceSpan.of(2, 4, 31, 22)),
two.getSourceSpans());

assertTrue(two.getNext() instanceof SoftLineBreak);

Text ghi = (Text) two.getNext().getNext();
assertEquals("ghi ", ghi.getLiteral());
assertEquals(List.of(SourceSpan.of(3, 0, 4)),
assertEquals(List.of(SourceSpan.of(3, 0, 54, 4)),
ghi.getSourceSpans());

Link three = (Link) ghi.getNext();
assertEquals("http://example.com/three", three.getDestination());
assertEquals(List.of(SourceSpan.of(3, 4, 24)),
assertEquals(List.of(SourceSpan.of(3, 4, 58, 24)),
three.getSourceSpans());

Text jkl = (Text) three.getNext();
assertEquals(" jkl", jkl.getLiteral());
assertEquals(List.of(SourceSpan.of(3, 28, 4)),
assertEquals(List.of(SourceSpan.of(3, 28, 82, 4)),
jkl.getSourceSpans());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -287,10 +287,10 @@ public void testSourcePositions() {

var doc = parser.parse("Test [^foo]\n\n[^foo]: /url\n");
var ref = find(doc, FootnoteReference.class);
assertEquals(ref.getSourceSpans(), List.of(SourceSpan.of(0, 5, 6)));
assertEquals(ref.getSourceSpans(), List.of(SourceSpan.of(0, 5, 5, 6)));

var def = find(doc, FootnoteDefinition.class);
assertEquals(def.getSourceSpans(), List.of(SourceSpan.of(2, 0, 12)));
assertEquals(def.getSourceSpans(), List.of(SourceSpan.of(2, 0, 13, 12)));
}

private static <T> T find(Node parent, Class<T> nodeClass) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ public void sourceSpans() {
Node document = parser.parse("hey ~~there~~\n");
Paragraph block = (Paragraph) document.getFirstChild();
Node strikethrough = block.getLastChild();
assertEquals(List.of(SourceSpan.of(0, 4, 9)),
assertEquals(List.of(SourceSpan.of(0, 4, 4, 9)),
strikethrough.getSourceSpans());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -791,45 +791,45 @@ public void sourceSpans() {
Node document = parser.parse("Abc|Def\n---|---\n|1|2\n 3|four|\n|||\n");

TableBlock block = (TableBlock) document.getFirstChild();
assertEquals(List.of(SourceSpan.of(0, 0, 7), SourceSpan.of(1, 0, 7),
SourceSpan.of(2, 0, 4), SourceSpan.of(3, 0, 8), SourceSpan.of(4, 0, 3)),
assertEquals(List.of(SourceSpan.of(0, 0, 0, 7), SourceSpan.of(1, 0, 8, 7),
SourceSpan.of(2, 0, 16, 4), SourceSpan.of(3, 0, 21, 8), SourceSpan.of(4, 0, 30, 3)),
block.getSourceSpans());

TableHead head = (TableHead) block.getFirstChild();
assertEquals(List.of(SourceSpan.of(0, 0, 7)), head.getSourceSpans());
assertEquals(List.of(SourceSpan.of(0, 0, 0, 7)), head.getSourceSpans());

TableRow headRow = (TableRow) head.getFirstChild();
assertEquals(List.of(SourceSpan.of(0, 0, 7)), headRow.getSourceSpans());
assertEquals(List.of(SourceSpan.of(0, 0, 0, 7)), headRow.getSourceSpans());
TableCell headRowCell1 = (TableCell) headRow.getFirstChild();
TableCell headRowCell2 = (TableCell) headRow.getLastChild();
assertEquals(List.of(SourceSpan.of(0, 0, 3)), headRowCell1.getSourceSpans());
assertEquals(List.of(SourceSpan.of(0, 0, 3)), headRowCell1.getFirstChild().getSourceSpans());
assertEquals(List.of(SourceSpan.of(0, 4, 3)), headRowCell2.getSourceSpans());
assertEquals(List.of(SourceSpan.of(0, 4, 3)), headRowCell2.getFirstChild().getSourceSpans());
assertEquals(List.of(SourceSpan.of(0, 0, 0, 3)), headRowCell1.getSourceSpans());
assertEquals(List.of(SourceSpan.of(0, 0, 0, 3)), headRowCell1.getFirstChild().getSourceSpans());
assertEquals(List.of(SourceSpan.of(0, 4, 4, 3)), headRowCell2.getSourceSpans());
assertEquals(List.of(SourceSpan.of(0, 4, 4, 3)), headRowCell2.getFirstChild().getSourceSpans());

TableBody body = (TableBody) block.getLastChild();
assertEquals(List.of(SourceSpan.of(2, 0, 4), SourceSpan.of(3, 0, 8), SourceSpan.of(4, 0, 3)), body.getSourceSpans());
assertEquals(List.of(SourceSpan.of(2, 0, 16, 4), SourceSpan.of(3, 0, 21, 8), SourceSpan.of(4, 0, 30, 3)), body.getSourceSpans());

TableRow bodyRow1 = (TableRow) body.getFirstChild();
assertEquals(List.of(SourceSpan.of(2, 0, 4)), bodyRow1.getSourceSpans());
assertEquals(List.of(SourceSpan.of(2, 0, 16, 4)), bodyRow1.getSourceSpans());
TableCell bodyRow1Cell1 = (TableCell) bodyRow1.getFirstChild();
TableCell bodyRow1Cell2 = (TableCell) bodyRow1.getLastChild();
assertEquals(List.of(SourceSpan.of(2, 1, 1)), bodyRow1Cell1.getSourceSpans());
assertEquals(List.of(SourceSpan.of(2, 1, 1)), bodyRow1Cell1.getFirstChild().getSourceSpans());
assertEquals(List.of(SourceSpan.of(2, 3, 1)), bodyRow1Cell2.getSourceSpans());
assertEquals(List.of(SourceSpan.of(2, 3, 1)), bodyRow1Cell2.getFirstChild().getSourceSpans());
assertEquals(List.of(SourceSpan.of(2, 1, 17, 1)), bodyRow1Cell1.getSourceSpans());
assertEquals(List.of(SourceSpan.of(2, 1, 17, 1)), bodyRow1Cell1.getFirstChild().getSourceSpans());
assertEquals(List.of(SourceSpan.of(2, 3, 19, 1)), bodyRow1Cell2.getSourceSpans());
assertEquals(List.of(SourceSpan.of(2, 3, 19, 1)), bodyRow1Cell2.getFirstChild().getSourceSpans());

TableRow bodyRow2 = (TableRow) body.getFirstChild().getNext();
assertEquals(List.of(SourceSpan.of(3, 0, 8)), bodyRow2.getSourceSpans());
assertEquals(List.of(SourceSpan.of(3, 0, 21, 8)), bodyRow2.getSourceSpans());
TableCell bodyRow2Cell1 = (TableCell) bodyRow2.getFirstChild();
TableCell bodyRow2Cell2 = (TableCell) bodyRow2.getLastChild();
assertEquals(List.of(SourceSpan.of(3, 1, 1)), bodyRow2Cell1.getSourceSpans());
assertEquals(List.of(SourceSpan.of(3, 1, 1)), bodyRow2Cell1.getFirstChild().getSourceSpans());
assertEquals(List.of(SourceSpan.of(3, 3, 4)), bodyRow2Cell2.getSourceSpans());
assertEquals(List.of(SourceSpan.of(3, 3, 4)), bodyRow2Cell2.getFirstChild().getSourceSpans());
assertEquals(List.of(SourceSpan.of(3, 1, 22, 1)), bodyRow2Cell1.getSourceSpans());
assertEquals(List.of(SourceSpan.of(3, 1, 22, 1)), bodyRow2Cell1.getFirstChild().getSourceSpans());
assertEquals(List.of(SourceSpan.of(3, 3, 24, 4)), bodyRow2Cell2.getSourceSpans());
assertEquals(List.of(SourceSpan.of(3, 3, 24, 4)), bodyRow2Cell2.getFirstChild().getSourceSpans());

TableRow bodyRow3 = (TableRow) body.getLastChild();
assertEquals(List.of(SourceSpan.of(4, 0, 3)), bodyRow3.getSourceSpans());
assertEquals(List.of(SourceSpan.of(4, 0, 30, 3)), bodyRow3.getSourceSpans());
TableCell bodyRow3Cell1 = (TableCell) bodyRow3.getFirstChild();
TableCell bodyRow3Cell2 = (TableCell) bodyRow3.getLastChild();
assertEquals(List.of(), bodyRow3Cell1.getSourceSpans());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ public void sourceSpans() {
Node document = parser.parse("x{height=3 width=4}\n");
Paragraph block = (Paragraph) document.getFirstChild();
Node text = block.getFirstChild();
assertEquals(List.of(SourceSpan.of(0, 0, 19)),
assertEquals(List.of(SourceSpan.of(0, 0, 0, 19)),
text.getSourceSpans());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ public void sourceSpans() {
Node document = parser.parse("hey ++there++\n");
Paragraph block = (Paragraph) document.getFirstChild();
Node ins = block.getLastChild();
assertEquals(List.of(SourceSpan.of(0, 4, 9)),
assertEquals(List.of(SourceSpan.of(0, 4, 4, 9)),
ins.getSourceSpans());
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.commonmark.internal;

import org.commonmark.internal.util.LineReader;
import org.commonmark.internal.util.Parsing;
import org.commonmark.node.*;
import org.commonmark.parser.IncludeSourceSpans;
Expand Down Expand Up @@ -127,7 +128,7 @@ public Document parse(String input) {
int lineBreak;
while ((lineBreak = Characters.findLineBreak(input, lineStart)) != -1) {
String line = input.substring(lineStart, lineBreak);
parseLine(line);
parseLine(line, lineStart);
if (lineBreak + 1 < input.length() && input.charAt(lineBreak) == '\r' && input.charAt(lineBreak + 1) == '\n') {
lineStart = lineBreak + 2;
} else {
Expand All @@ -136,23 +137,23 @@ public Document parse(String input) {
}
if (!input.isEmpty() && (lineStart == 0 || lineStart < input.length())) {
String line = input.substring(lineStart);
parseLine(line);
parseLine(line, lineStart);
}

return finalizeAndProcess();
}

public Document parse(Reader input) throws IOException {
BufferedReader bufferedReader;
if (input instanceof BufferedReader) {
bufferedReader = (BufferedReader) input;
} else {
bufferedReader = new BufferedReader(input);
}

var lineReader = new LineReader(input);
int inputIndex = 0;
String line;
while ((line = bufferedReader.readLine()) != null) {
parseLine(line);
while ((line = lineReader.readLine()) != null) {
parseLine(line, inputIndex);
inputIndex += line.length();
var eol = lineReader.getLineTerminator();
if (eol != null) {
inputIndex += eol.length();
}
}

return finalizeAndProcess();
Expand Down Expand Up @@ -197,8 +198,8 @@ public BlockParser getActiveBlockParser() {
* Analyze a line of text and update the document appropriately. We parse markdown text by calling this on each
* line of input, then finalizing the document.
*/
private void parseLine(String ln) {
setLine(ln);
private void parseLine(String ln, int inputIndex) {
setLine(ln, inputIndex);

// For each containing block, try to parse the associated line start.
// The document will always match, so we can skip the first block parser and start at 1 matches
Expand Down Expand Up @@ -322,7 +323,7 @@ private void parseLine(String ln) {
}
}

private void setLine(String ln) {
private void setLine(String ln, int inputIndex) {
lineIndex++;
index = 0;
column = 0;
Expand All @@ -331,7 +332,7 @@ private void setLine(String ln) {
String lineContent = prepareLine(ln);
SourceSpan sourceSpan = null;
if (includeSourceSpans != IncludeSourceSpans.NONE) {
sourceSpan = SourceSpan.of(lineIndex, 0, lineContent.length());
sourceSpan = SourceSpan.of(lineIndex, 0, inputIndex, lineContent.length());
}
this.line = SourceLine.of(lineContent, sourceSpan);
}
Expand Down Expand Up @@ -430,10 +431,9 @@ private void addLine() {
content = line.getContent().subSequence(index, line.getContent().length());
}
SourceSpan sourceSpan = null;
if (includeSourceSpans == IncludeSourceSpans.BLOCKS_AND_INLINES) {
// Note that if we're in a partially-consumed tab, the length here corresponds to the content but not to the
// actual source length. That sounds like a problem, but I haven't found a test case where it matters (yet).
sourceSpan = SourceSpan.of(lineIndex, index, content.length());
if (includeSourceSpans == IncludeSourceSpans.BLOCKS_AND_INLINES && index < line.getSourceSpan().getLength()) {
// Note that if we're in a partially-consumed tab the length of the source span and the content don't match.
sourceSpan = line.getSourceSpan().subSpan(index);
}
getActiveBlockParser().addLine(SourceLine.of(content, sourceSpan));
addSourceSpans();
Expand All @@ -449,7 +449,7 @@ private void addSourceSpans() {
int blockIndex = Math.min(openBlockParser.sourceIndex, index);
int length = line.getContent().length() - blockIndex;
if (length != 0) {
openBlockParser.blockParser.addSourceSpan(SourceSpan.of(lineIndex, blockIndex, length));
openBlockParser.blockParser.addSourceSpan(line.getSourceSpan().subSpan(blockIndex));
}
}
}
Expand Down
70 changes: 65 additions & 5 deletions commonmark/src/main/java/org/commonmark/node/SourceSpan.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,39 +27,97 @@ public class SourceSpan {

private final int lineIndex;
private final int columnIndex;
private final int inputIndex;
private final int length;

public static SourceSpan of(int line, int col, int input, int length) {
return new SourceSpan(line, col, input, length);
}

/**
* @deprecated Use {{@link #of(int, int, int, int)}} instead to also specify input index. Using the deprecated one
* will set {@link #inputIndex} to 0.
*/
@Deprecated
public static SourceSpan of(int lineIndex, int columnIndex, int length) {
return new SourceSpan(lineIndex, columnIndex, length);
return of(lineIndex, columnIndex, 0, length);
}

private SourceSpan(int lineIndex, int columnIndex, int length) {
private SourceSpan(int lineIndex, int columnIndex, int inputIndex, int length) {
if (lineIndex < 0) {
throw new IllegalArgumentException("lineIndex " + lineIndex + " must be >= 0");
}
if (columnIndex < 0) {
throw new IllegalArgumentException("columnIndex " + columnIndex + " must be >= 0");
}
if (inputIndex < 0) {
throw new IllegalArgumentException("inputIndex " + inputIndex + " must be >= 0");
}
if (length < 0) {
throw new IllegalArgumentException("length " + length + " must be >= 0");
}
this.lineIndex = lineIndex;
this.columnIndex = columnIndex;
this.inputIndex = inputIndex;
this.length = length;
}

/**
* @return 0-based index of line in source
* @return 0-based line index, e.g. 0 for first line, 1 for the second line, etc
*/
public int getLineIndex() {
return lineIndex;
}

/**
* @return 0-based index of column (character on line) in source
* @return 0-based index of column (character on line) in source, e.g. 0 for the first character of a line, 1 for
* the second character, etc
*/
public int getColumnIndex() {
return columnIndex;
}

/**
* @return 0-based index in whole input
* @since 0.24.0
*/
public int getInputIndex() {
return inputIndex;
}

/**
* @return length of the span in characters
*/
public int getLength() {
return length;
}

public SourceSpan subSpan(int beginIndex) {
return subSpan(beginIndex, length);
}

public SourceSpan subSpan(int beginIndex, int endIndex) {
if (beginIndex < 0) {
throw new IndexOutOfBoundsException("beginIndex " + beginIndex + " + must be >= 0");
}
if (beginIndex > length) {
throw new IndexOutOfBoundsException("beginIndex " + beginIndex + " must be <= length " + length);
}
if (endIndex < 0) {
throw new IndexOutOfBoundsException("endIndex " + endIndex + " + must be >= 0");
}
if (endIndex > length) {
throw new IndexOutOfBoundsException("endIndex " + endIndex + " must be <= length " + length);
}
if (beginIndex > endIndex) {
throw new IndexOutOfBoundsException("beginIndex " + beginIndex + " must be <= endIndex " + endIndex);
}
if (beginIndex == 0 && endIndex == length) {
return this;
}
return new SourceSpan(lineIndex, columnIndex + beginIndex, inputIndex + beginIndex, endIndex - beginIndex);
}

@Override
public boolean equals(Object o) {
if (this == o) {
Expand All @@ -71,19 +129,21 @@ public boolean equals(Object o) {
SourceSpan that = (SourceSpan) o;
return lineIndex == that.lineIndex &&
columnIndex == that.columnIndex &&
inputIndex == that.inputIndex &&
length == that.length;
}

@Override
public int hashCode() {
return Objects.hash(lineIndex, columnIndex, length);
return Objects.hash(lineIndex, columnIndex, inputIndex, length);
}

@Override
public String toString() {
return "SourceSpan{" +
"line=" + lineIndex +
", column=" + columnIndex +
", input=" + inputIndex +
", length=" + length +
"}";
}
Expand Down
Loading

0 comments on commit a6b3daa

Please sign in to comment.