Skip to content

Commit

Permalink
Add more JavaDoc documentation and fix a bug where the computation of…
Browse files Browse the repository at this point in the history
… a column number led to an exception
  • Loading branch information
BjoernLoetters committed Jan 30, 2025
1 parent 278e799 commit 4307218
Show file tree
Hide file tree
Showing 5 changed files with 144 additions and 26 deletions.
22 changes: 15 additions & 7 deletions core/src/main/java/jcombinators/Parsing.java
Original file line number Diff line number Diff line change
Expand Up @@ -312,17 +312,16 @@ public boolean isFatal() {
/**
* The abstract base class of a {@link Parser}.
* <br/><br/>
* Very similar to recursive descent parsing, a {@link Parser} is just a {@link Function} that takes an
* {@link Input} and produces a {@link Result}. That is to say, in order to implement a {@link Parser} it
* suffices to create an anonymous class that extends this class, implementing the {@link Parser#apply}
* method.
*
* @implNote Unfortunately, Java does not support instance interfaces which is why this class is not a
* Very similar to recursive descent parsing, a {@link Parser} is just a {@link Function} that takes an {@link Input}
* and produces a {@link Result}. In order to implement a {@link Parser} it therefore suffices to implement the
* {@link Parser#apply} method.
* <br/><br/>
* <b>Implementation Note</b>: Unfortunately, Java does not support instance interfaces which is why this class is not a
* {@link FunctionalInterface} and we cannot use the lambda syntax to implement a {@link Parser}.
*
* @author Björn Lötters
*
* @param <T> The type of the value that is the result of running this parser.
* @param <T> The type of the value that is the result of running this {@link Parser}.
*/
public abstract class Parser<T> implements Function<Input<I>, Result<T>> {

Expand All @@ -334,6 +333,15 @@ public abstract class Parser<T> implements Function<Input<I>, Result<T>> {
@Override
public abstract Result<T> apply(final Input<I> input);

/**
* This method shall return a {@link Description} for this {@link Parser} which provides details about the shape
* of the {@link Input} this {@link Parser} expects. By default, this method returns an {@link Empty} {@link Description}
* and must hence be overwritten if required.
*
* @return A {@link Description} of this {@link Parser}.
*
* @see Description
*/
public Description description() {
return new Empty();
}
Expand Down
73 changes: 73 additions & 0 deletions core/src/main/java/jcombinators/description/Description.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,61 @@
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import jcombinators.Parsing.Parser;
import jcombinators.Parsing.Failure;

/**
* The base class for all kinds of {@link Parser} {@link Description}s.
* <br/><br/>
* A {@link Description} describes the expectation of a {@link Parser} and can be used to generate more informative
* error messages in case of a {@link Failure}. The following kinds of {@link Description}s are supported:
* <ul>
* <li>{@link Literal}: A {@link Description} describing that a specific {@link String} literal was expected.</li>
* <li>{@link RegExp}: A {@link Description} describing that a specific {@link Pattern} was expected.</li>
* <li>{@link Choice}: A {@link Description} that represents a choice between other {@link Description}s.</li>
* <li>{@link Sequence}: A {@link Description} that represents a sequence of other {@link Description}s.</li>
* <li>{@link Negation}: A {@link Description} that negates another {@link Description}.</li>
* <li>{@link Empty}: An empty {@link Description} that does not provide further information.</li>
* </ul>
*
* @see Choice
* @see Literal
* @see Negation
* @see RegExp
* @see Sequence
* @see Empty
* @see Parser
*
* @author Björn Lötters
*/
public sealed abstract class Description permits Choice, Literal, Negation, RegExp, Sequence, Empty {

/**
* Produces a {@link String} that explains what a corresponding {@link Parser} would expect according to this
* {@link Description}.
* @return An {@link Optional} {@link String} where {@link Optional#empty()} is returned in case this
* {@link Description} is {@link Empty}.
*/
public abstract Optional<String> describe();

/**
* Normalizes this {@link Description} such that {@link Negation}s are propagated downwards to the individual
* {@link Literal} and {@link RegExp} {@link Description}s. Moreover, {@link Choice}s of {@link RegExp}s are
* summarized into a single {@link RegExp}.
* @return The normalized {@link Description}.
*/
public final Description normalize() {
return normalize(this, false);
}

/**
* Normalizes the provided {@link Description} such that {@link Negation}s are propagated downwards to the individual
* {@link Literal} and {@link RegExp} {@link Description}s. Moreover, {@link Choice}s of {@link RegExp}s are
* summarized into a single {@link RegExp}.
* @param description The {@link Description} that shall be normalized.
* @param negate Whether the provided {@link Description} should be negated or not.
* @return The normalized {@link Description}.
*/
private static Description normalize(final Description description, final boolean negate) {
switch (description) {
case Choice choice: {
Expand Down Expand Up @@ -55,6 +102,12 @@ private static Description normalize(final Description description, final boolea
}
}

/**
* Collects nested {@link Choice}s into a single {@link List} of {@link Description}s.
* @param choice The {@link Choice} whose {@link Description}s should be collected.
* @param alternatives The {@link List} to which the individual {@link Description}s should be added.
* @return The provided {@link List}.
*/
private static List<Description> collect(final Choice choice, final List<Description> alternatives) {
for (final Description alternative : choice.alternatives) {
if (alternative instanceof Choice other) {
Expand All @@ -67,6 +120,12 @@ private static List<Description> collect(final Choice choice, final List<Descrip
return alternatives;
}

/**
* Collects nested {@link Sequence}s into a single {@link List} of {@link Description}s.
* @param sequence The {@link Sequence} whose {@link Description}s should be collected.
* @param elements The {@link List} to which the individual {@link Description}s should be added.
* @return The provided {@link List}.
*/
private static List<Description> collect(final Sequence sequence, final List<Description> elements) {
for (final Description element : sequence.elements) {
if (element instanceof Sequence other) {
Expand All @@ -79,14 +138,28 @@ private static List<Description> collect(final Sequence sequence, final List<Des
return elements;
}

/**
* Creates a new {@link Description} by creating a {@link Sequence} from {@code this} and the provided {@link Description}.
* @param description The second {@link Description}.
* @return A {@link Sequence} {@link Description} containing {@code this} and the provided {@link Description} in this order.
*/
public final Description and(final Description description) {
return new Sequence(List.of(this, description));
}

/**
* Creates a new {@link Description} by creating a {@link Choice} from {@code this} and the provided {@link Description}.
* @param alternative The alternative {@link Description}.
* @return A {@link Choice} {@link Description} containing {@code this} and the provided {@link Description} in this order.
*/
public final Description or(final Description alternative) {
return new Choice(List.of(this, alternative));
}

/**
* Creates a new {@link Description} by creating a {@link Negation} from {@code this} {@link Description}.
* @return A {@link Negation} of {@code this} {@link Description}.
*/
public final Description negate() {
return new Negation(this);
}
Expand Down
51 changes: 43 additions & 8 deletions core/src/main/java/jcombinators/input/CharacterInput.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
package jcombinators.input;

import java.util.Arrays;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.stream.Collectors;

/**
* A specific implementation of an {@link Input} for {@link Character}s that also implements the {@link CharSequence}
Expand All @@ -25,6 +29,15 @@ public final class CharacterInput extends Input<Character> implements CharSequen

private final int length;

/**
* Constructs a new {@link CharacterInput}.
* @param name A human-readable name for this {@link CharacterInput}.
* @param sequence The underlying {@link CharSequence} for this {@link CharacterInput}.
* @param offset An offset in characters that denotes the start of a subsequence in the provided {@link CharSequence}.
* @param length The length of the subsequence that is denoted by the offset.
* @param lines A cache of line offsets, which is used to compute the line and column numbers in unicode code points
* on basis of the character offset.
*/
CharacterInput(final String name, final CharSequence sequence, final int offset, final int length, final int[] lines) {
super(name);
this.sequence = sequence;
Expand Down Expand Up @@ -88,10 +101,11 @@ public String toString() {
}

/**
* TODO:
* 1. Add more JavaDoc documentation
* 2. Implement RegExpParser for arbitrary Input<Character> and not only CharacterInputs
* 3. Implement a "skip" method that skips a prefix only (filter is probably too progressive)
* Represents a {@link Position} in this {@link CharacterInput} that is aware of the underlying unicode code points.
*
* @see Position
* @see Character
*
* @author Björn Lötters
*/
public final class CodePointPosition extends Position {
Expand All @@ -101,14 +115,24 @@ public final class CodePointPosition extends Position {
* @param offset The offset of this {@link CodePointPosition}. An offset is the number of characters that must
* be skipped in the underlying {@link CharacterInput} in order to reach this {@link CodePointPosition}.
*/
public CodePointPosition(final int offset) {
private CodePointPosition(final int offset) {
super(offset);
}

/**
* Returns the code point that occurs in the associated {@link CharacterInput} at this {@link CodePointPosition}.
* @return The unicode code point at this {@link CodePointPosition}.
*/
public int getCodePoint() {
return Character.codePointAt(sequence, offset);
}

/**
* Computes the line number that corresponds to this {@link CodePointPosition} as it is perceived by the user.
* That is to say, this is not necessarily the character offset in this {@link CodePointPosition} but the
* unicode code point offset.
* @return The line number of this {@link CodePointPosition}.
*/
public int getLineNumber() {
// Here, we do a binary search to find the index of the line number that corresponds to the offset of this position.
int lower = 0;
Expand All @@ -131,20 +155,31 @@ public int getLineNumber() {
return lower + 1;
}

/**
* Computes the column number that corresponds to this {@link CodePointPosition} as it is perceived by the user.
* That is to say, this is not necessarily the character offset in this {@link CodePointPosition} but the
* unicode code point offset relative to the corresponding line offset.
* @return The column number of this {@link CodePointPosition}.
*/
public int getColumnNumber() {
final int lineOffset = lines[getLineNumber() - 1];
final int lineNumber = getLineNumber() - 1;
final int lineOffset = lineNumber < lines.length ? lines[lineNumber] : 0;
// We add 1 here, since column numbers usually start at 1 and not 0.
return offset - lineOffset + 1;
}

@Override
public String describe() {
return String.format("character '%s'", Character.toString(getCodePoint()));
if (offset >= CharacterInput.this.sequence.length()) {
return "end of input";
} else {
return String.format("character '%s'", Character.toString(getCodePoint()));
}
}

@Override
public String toString() {
return String.format("position %d:%d", getLineNumber(), getColumnNumber());
return String.format("%s at line %d and column %d", name, getLineNumber(), getColumnNumber());
}

}
Expand Down
15 changes: 9 additions & 6 deletions core/src/main/java/jcombinators/input/Input.java
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
package jcombinators.input;

import jcombinators.Parsing.Parser;

import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.sql.Array;
import java.util.*;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.function.Predicate;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

import jcombinators.Parsing.Parser;

/**
* An abstract representation of an {@link Input} for parsing. In most cases, this will be a {@link CharInputOld}, which is
* an {@link Input} of {@link Character} and can be considered a sequence of characters (just like a {@link String}).
* An abstract representation of an {@link Input} for parsing. In most cases, this will be a {@link CharacterInput}, which
* is an {@link Input} of {@link Character}s and can be considered a sequence of characters (just like a {@link String}).
* @param <T> The element type for this {@link Input}. In most cases this type equals {@link Character}.
*
* @see Parser
Expand Down Expand Up @@ -196,6 +197,7 @@ public static <T> Input<T> of(final String name, final Stream<T> stream) {
* @param stream The underlying source for this {@link Input}.
* @param charset The {@link Charset} that shall be used to decode the {@link InputStream}.
* @return An {@link Input} of {@link Character}s that is based on the underlying {@link InputStream}.
* @throws IOException If reading from the {@link InputStream} fails.
*/
public static Input<Character> of(final String name, final InputStream stream, final Charset charset) throws IOException {
final byte[] bytes = stream.readAllBytes();
Expand All @@ -208,6 +210,7 @@ public static Input<Character> of(final String name, final InputStream stream, f
* used as the name for the {@link Input}.
* @param charset The {@link Charset} that shall be used to decode the file denoted by the provided {@link Path}.
* @return An {@link Input} of {@link Character}s that is based on the underlying file contents.
* @throws IOException If reading the file contents denoted by the {@link Path} fails.
*/
public static Input<Character> of(final Path path, final Charset charset) throws IOException {
final byte[] bytes = Files.readAllBytes(path);
Expand Down
9 changes: 4 additions & 5 deletions core/src/test/java/jcombinators/CombinatorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import jcombinators.data.Product;
import jcombinators.input.Input;
import jcombinators.position.Position;
import org.junit.Test;

import java.util.List;
Expand Down Expand Up @@ -226,21 +225,21 @@ public void positionParserCorrectPositionTest() {
final String contents = "line1\nline2\nline3\n";
final Input<Character> input = Input.of("test", contents);

final Parser<Function<Position, String>> parser = regex("line[0-9]\n").map(ignore -> position -> position.line + ":" + position.column);
final Parser<Function<Input<Character>.Position, String>> parser = regex("line[0-9]\n").map(ignore -> position -> position.toString());

final Parser<String> positionParser = position(parser);
final Result<String> firstResult = positionParser.apply(input);

assertTrue(firstResult.isSuccess());
assertEquals("1:1", firstResult.get().get());
assertEquals("position 1:1", firstResult.get().get());

final Result<String> secondResult = positionParser.apply(firstResult.rest);
assertTrue(secondResult.isSuccess());
assertEquals("2:1", secondResult.get().get());
assertEquals("position 2:1", secondResult.get().get());

final Result<String> thirdResult = positionParser.apply(secondResult.rest);
assertTrue(thirdResult.isSuccess());
assertEquals("3:1", thirdResult.get().get());
assertEquals("position 3:1", thirdResult.get().get());
}

}

0 comments on commit 4307218

Please sign in to comment.