From 43072187392ff66a39c36472a74b197086c015da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20L=C3=B6tters?= Date: Thu, 30 Jan 2025 17:06:04 +0100 Subject: [PATCH] Add more JavaDoc documentation and fix a bug where the computation of a column number led to an exception --- core/src/main/java/jcombinators/Parsing.java | 22 ++++-- .../jcombinators/description/Description.java | 73 +++++++++++++++++++ .../jcombinators/input/CharacterInput.java | 51 +++++++++++-- .../main/java/jcombinators/input/Input.java | 15 ++-- .../java/jcombinators/CombinatorTest.java | 9 +-- 5 files changed, 144 insertions(+), 26 deletions(-) diff --git a/core/src/main/java/jcombinators/Parsing.java b/core/src/main/java/jcombinators/Parsing.java index 25e7f23..38798a2 100644 --- a/core/src/main/java/jcombinators/Parsing.java +++ b/core/src/main/java/jcombinators/Parsing.java @@ -312,17 +312,16 @@ public boolean isFatal() { /** * The abstract base class of a {@link Parser}. *

- * Very similar to recursive descent parsing, a {@link Parser} is just a {@link Function} that takes an - * {@link Input} and produces a {@link Result}. That is to say, in order to implement a {@link Parser} it - * suffices to create an anonymous class that extends this class, implementing the {@link Parser#apply} - * method. - * - * @implNote Unfortunately, Java does not support instance interfaces which is why this class is not a + * Very similar to recursive descent parsing, a {@link Parser} is just a {@link Function} that takes an {@link Input} + * and produces a {@link Result}. In order to implement a {@link Parser} it therefore suffices to implement the + * {@link Parser#apply} method. + *

+ * Implementation Note: Unfortunately, Java does not support instance interfaces which is why this class is not a * {@link FunctionalInterface} and we cannot use the lambda syntax to implement a {@link Parser}. * * @author Björn Lötters * - * @param The type of the value that is the result of running this parser. + * @param The type of the value that is the result of running this {@link Parser}. */ public abstract class Parser implements Function, Result> { @@ -334,6 +333,15 @@ public abstract class Parser implements Function, Result> { @Override public abstract Result apply(final Input input); + /** + * This method shall return a {@link Description} for this {@link Parser} which provides details about the shape + * of the {@link Input} this {@link Parser} expects. By default, this method returns an {@link Empty} {@link Description} + * and must hence be overwritten if required. + * + * @return A {@link Description} of this {@link Parser}. + * + * @see Description + */ public Description description() { return new Empty(); } diff --git a/core/src/main/java/jcombinators/description/Description.java b/core/src/main/java/jcombinators/description/Description.java index 4b7fc7f..42e9e9c 100644 --- a/core/src/main/java/jcombinators/description/Description.java +++ b/core/src/main/java/jcombinators/description/Description.java @@ -6,14 +6,61 @@ import java.util.regex.Pattern; import java.util.stream.Collectors; +import jcombinators.Parsing.Parser; +import jcombinators.Parsing.Failure; + +/** + * The base class for all kinds of {@link Parser} {@link Description}s. + *

+ * A {@link Description} describes the expectation of a {@link Parser} and can be used to generate more informative + * error messages in case of a {@link Failure}. The following kinds of {@link Description}s are supported: + *
    + *
  • {@link Literal}: A {@link Description} describing that a specific {@link String} literal was expected.
  • + *
  • {@link RegExp}: A {@link Description} describing that a specific {@link Pattern} was expected.
  • + *
  • {@link Choice}: A {@link Description} that represents a choice between other {@link Description}s.
  • + *
  • {@link Sequence}: A {@link Description} that represents a sequence of other {@link Description}s.
  • + *
  • {@link Negation}: A {@link Description} that negates another {@link Description}.
  • + *
  • {@link Empty}: An empty {@link Description} that does not provide further information.
  • + *
+ * + * @see Choice + * @see Literal + * @see Negation + * @see RegExp + * @see Sequence + * @see Empty + * @see Parser + * + * @author Björn Lötters + */ public sealed abstract class Description permits Choice, Literal, Negation, RegExp, Sequence, Empty { + /** + * Produces a {@link String} that explains what a corresponding {@link Parser} would expect according to this + * {@link Description}. + * @return An {@link Optional} {@link String} where {@link Optional#empty()} is returned in case this + * {@link Description} is {@link Empty}. + */ public abstract Optional describe(); + /** + * Normalizes this {@link Description} such that {@link Negation}s are propagated downwards to the individual + * {@link Literal} and {@link RegExp} {@link Description}s. Moreover, {@link Choice}s of {@link RegExp}s are + * summarized into a single {@link RegExp}. + * @return The normalized {@link Description}. + */ public final Description normalize() { return normalize(this, false); } + /** + * Normalizes the provided {@link Description} such that {@link Negation}s are propagated downwards to the individual + * {@link Literal} and {@link RegExp} {@link Description}s. Moreover, {@link Choice}s of {@link RegExp}s are + * summarized into a single {@link RegExp}. + * @param description The {@link Description} that shall be normalized. + * @param negate Whether the provided {@link Description} should be negated or not. + * @return The normalized {@link Description}. + */ private static Description normalize(final Description description, final boolean negate) { switch (description) { case Choice choice: { @@ -55,6 +102,12 @@ private static Description normalize(final Description description, final boolea } } + /** + * Collects nested {@link Choice}s into a single {@link List} of {@link Description}s. + * @param choice The {@link Choice} whose {@link Description}s should be collected. + * @param alternatives The {@link List} to which the individual {@link Description}s should be added. + * @return The provided {@link List}. + */ private static List collect(final Choice choice, final List alternatives) { for (final Description alternative : choice.alternatives) { if (alternative instanceof Choice other) { @@ -67,6 +120,12 @@ private static List collect(final Choice choice, final List collect(final Sequence sequence, final List elements) { for (final Description element : sequence.elements) { if (element instanceof Sequence other) { @@ -79,14 +138,28 @@ private static List collect(final Sequence sequence, final List implements CharSequen private final int length; + /** + * Constructs a new {@link CharacterInput}. + * @param name A human-readable name for this {@link CharacterInput}. + * @param sequence The underlying {@link CharSequence} for this {@link CharacterInput}. + * @param offset An offset in characters that denotes the start of a subsequence in the provided {@link CharSequence}. + * @param length The length of the subsequence that is denoted by the offset. + * @param lines A cache of line offsets, which is used to compute the line and column numbers in unicode code points + * on basis of the character offset. + */ CharacterInput(final String name, final CharSequence sequence, final int offset, final int length, final int[] lines) { super(name); this.sequence = sequence; @@ -88,10 +101,11 @@ public String toString() { } /** - * TODO: - * 1. Add more JavaDoc documentation - * 2. Implement RegExpParser for arbitrary Input and not only CharacterInputs - * 3. Implement a "skip" method that skips a prefix only (filter is probably too progressive) + * Represents a {@link Position} in this {@link CharacterInput} that is aware of the underlying unicode code points. + * + * @see Position + * @see Character + * * @author Björn Lötters */ public final class CodePointPosition extends Position { @@ -101,14 +115,24 @@ public final class CodePointPosition extends Position { * @param offset The offset of this {@link CodePointPosition}. An offset is the number of characters that must * be skipped in the underlying {@link CharacterInput} in order to reach this {@link CodePointPosition}. */ - public CodePointPosition(final int offset) { + private CodePointPosition(final int offset) { super(offset); } + /** + * Returns the code point that occurs in the associated {@link CharacterInput} at this {@link CodePointPosition}. + * @return The unicode code point at this {@link CodePointPosition}. + */ public int getCodePoint() { return Character.codePointAt(sequence, offset); } + /** + * Computes the line number that corresponds to this {@link CodePointPosition} as it is perceived by the user. + * That is to say, this is not necessarily the character offset in this {@link CodePointPosition} but the + * unicode code point offset. + * @return The line number of this {@link CodePointPosition}. + */ public int getLineNumber() { // Here, we do a binary search to find the index of the line number that corresponds to the offset of this position. int lower = 0; @@ -131,20 +155,31 @@ public int getLineNumber() { return lower + 1; } + /** + * Computes the column number that corresponds to this {@link CodePointPosition} as it is perceived by the user. + * That is to say, this is not necessarily the character offset in this {@link CodePointPosition} but the + * unicode code point offset relative to the corresponding line offset. + * @return The column number of this {@link CodePointPosition}. + */ public int getColumnNumber() { - final int lineOffset = lines[getLineNumber() - 1]; + final int lineNumber = getLineNumber() - 1; + final int lineOffset = lineNumber < lines.length ? lines[lineNumber] : 0; // We add 1 here, since column numbers usually start at 1 and not 0. return offset - lineOffset + 1; } @Override public String describe() { - return String.format("character '%s'", Character.toString(getCodePoint())); + if (offset >= CharacterInput.this.sequence.length()) { + return "end of input"; + } else { + return String.format("character '%s'", Character.toString(getCodePoint())); + } } @Override public String toString() { - return String.format("position %d:%d", getLineNumber(), getColumnNumber()); + return String.format("%s at line %d and column %d", name, getLineNumber(), getColumnNumber()); } } diff --git a/core/src/main/java/jcombinators/input/Input.java b/core/src/main/java/jcombinators/input/Input.java index dd57df5..fbf173c 100644 --- a/core/src/main/java/jcombinators/input/Input.java +++ b/core/src/main/java/jcombinators/input/Input.java @@ -1,22 +1,23 @@ package jcombinators.input; +import jcombinators.Parsing.Parser; + import java.io.IOException; import java.io.InputStream; import java.nio.charset.Charset; import java.nio.file.Files; import java.nio.file.Path; -import java.sql.Array; -import java.util.*; +import java.util.Iterator; +import java.util.NoSuchElementException; +import java.util.Objects; import java.util.function.Predicate; import java.util.stream.IntStream; import java.util.stream.Stream; import java.util.stream.StreamSupport; -import jcombinators.Parsing.Parser; - /** - * An abstract representation of an {@link Input} for parsing. In most cases, this will be a {@link CharInputOld}, which is - * an {@link Input} of {@link Character} and can be considered a sequence of characters (just like a {@link String}). + * An abstract representation of an {@link Input} for parsing. In most cases, this will be a {@link CharacterInput}, which + * is an {@link Input} of {@link Character}s and can be considered a sequence of characters (just like a {@link String}). * @param The element type for this {@link Input}. In most cases this type equals {@link Character}. * * @see Parser @@ -196,6 +197,7 @@ public static Input of(final String name, final Stream stream) { * @param stream The underlying source for this {@link Input}. * @param charset The {@link Charset} that shall be used to decode the {@link InputStream}. * @return An {@link Input} of {@link Character}s that is based on the underlying {@link InputStream}. + * @throws IOException If reading from the {@link InputStream} fails. */ public static Input of(final String name, final InputStream stream, final Charset charset) throws IOException { final byte[] bytes = stream.readAllBytes(); @@ -208,6 +210,7 @@ public static Input of(final String name, final InputStream stream, f * used as the name for the {@link Input}. * @param charset The {@link Charset} that shall be used to decode the file denoted by the provided {@link Path}. * @return An {@link Input} of {@link Character}s that is based on the underlying file contents. + * @throws IOException If reading the file contents denoted by the {@link Path} fails. */ public static Input of(final Path path, final Charset charset) throws IOException { final byte[] bytes = Files.readAllBytes(path); diff --git a/core/src/test/java/jcombinators/CombinatorTest.java b/core/src/test/java/jcombinators/CombinatorTest.java index 4c259b9..2eb66dc 100644 --- a/core/src/test/java/jcombinators/CombinatorTest.java +++ b/core/src/test/java/jcombinators/CombinatorTest.java @@ -2,7 +2,6 @@ import jcombinators.data.Product; import jcombinators.input.Input; -import jcombinators.position.Position; import org.junit.Test; import java.util.List; @@ -226,21 +225,21 @@ public void positionParserCorrectPositionTest() { final String contents = "line1\nline2\nline3\n"; final Input input = Input.of("test", contents); - final Parser> parser = regex("line[0-9]\n").map(ignore -> position -> position.line + ":" + position.column); + final Parser.Position, String>> parser = regex("line[0-9]\n").map(ignore -> position -> position.toString()); final Parser positionParser = position(parser); final Result firstResult = positionParser.apply(input); assertTrue(firstResult.isSuccess()); - assertEquals("1:1", firstResult.get().get()); + assertEquals("position 1:1", firstResult.get().get()); final Result secondResult = positionParser.apply(firstResult.rest); assertTrue(secondResult.isSuccess()); - assertEquals("2:1", secondResult.get().get()); + assertEquals("position 2:1", secondResult.get().get()); final Result thirdResult = positionParser.apply(secondResult.rest); assertTrue(thirdResult.isSuccess()); - assertEquals("3:1", thirdResult.get().get()); + assertEquals("position 3:1", thirdResult.get().get()); } }