From 43072187392ff66a39c36472a74b197086c015da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20L=C3=B6tters?= <bjoern.loetters@mni.thm.de>
Date: Thu, 30 Jan 2025 17:06:04 +0100
Subject: [PATCH] Add more JavaDoc documentation and fix a bug where the
 computation of a column number led to an exception

---
 core/src/main/java/jcombinators/Parsing.java  | 22 ++++--
 .../jcombinators/description/Description.java | 73 +++++++++++++++++++
 .../jcombinators/input/CharacterInput.java    | 51 +++++++++++--
 .../main/java/jcombinators/input/Input.java   | 15 ++--
 .../java/jcombinators/CombinatorTest.java     |  9 +--
 5 files changed, 144 insertions(+), 26 deletions(-)
diff --git a/core/src/main/java/jcombinators/Parsing.java b/core/src/main/java/jcombinators/Parsing.java
index 25e7f23..38798a2 100644
--- a/core/src/main/java/jcombinators/Parsing.java
+++ b/core/src/main/java/jcombinators/Parsing.java
@@ -312,17 +312,16 @@ public boolean isFatal() {
     /**
      * The abstract base class of a {@link Parser}.
      * <br/><br/>
-     * Very similar to recursive descent parsing, a {@link Parser} is just a {@link Function} that takes an
-     * {@link Input} and produces a {@link Result}. That is to say, in order to implement a {@link Parser} it
-     * suffices to create an anonymous class that extends this class, implementing the {@link Parser#apply}
-     * method.
-     *
-     * @implNote Unfortunately, Java does not support instance interfaces which is why this class is not a
+     * Very similar to recursive descent parsing, a {@link Parser} is just a {@link Function} that takes an {@link Input}
+     * and produces a {@link Result}. In order to implement a {@link Parser} it therefore suffices to implement the
+     * {@link Parser#apply} method.
+     * <br/><br/>
+     * <b>Implementation Note</b>: Unfortunately, Java does not support instance interfaces which is why this class is not a
      * {@link FunctionalInterface} and we cannot use the lambda syntax to implement a {@link Parser}.
      *
      * @author Björn Lötters
      *
-     * @param <T> The type of the value that is the result of running this parser.
+     * @param <T> The type of the value that is the result of running this {@link Parser}.
      */
     public abstract class Parser<T> implements Function<Input<I>, Result<T>> {
 
@@ -334,6 +333,15 @@ public abstract class Parser<T> implements Function<Input<I>, Result<T>> {
         @Override
         public abstract Result<T> apply(final Input<I> input);
 
+        /**
+         * This method shall return a {@link Description} for this {@link Parser} which provides details about the shape
+         * of the {@link Input} this {@link Parser} expects. By default, this method returns an {@link Empty} {@link Description}
+         * and must hence be overwritten if required.
+         *
+         * @return A {@link Description} of this {@link Parser}.
+         *
+         * @see Description
+         */
         public Description description() {
             return new Empty();
         }
diff --git a/core/src/main/java/jcombinators/description/Description.java b/core/src/main/java/jcombinators/description/Description.java
index 4b7fc7f..42e9e9c 100644
--- a/core/src/main/java/jcombinators/description/Description.java
+++ b/core/src/main/java/jcombinators/description/Description.java
@@ -6,14 +6,61 @@
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 
+import jcombinators.Parsing.Parser;
+import jcombinators.Parsing.Failure;
+
+/**
+ * The base class for all kinds of {@link Parser} {@link Description}s.
+ * <br/><br/>
+ * A {@link Description} describes the expectation of a {@link Parser} and can be used to generate more informative
+ * error messages in case of a {@link Failure}. The following kinds of {@link Description}s are supported:
+ * <ul>
+ *     <li>{@link Literal}: A {@link Description} describing that a specific {@link String} literal was expected.</li>
+ *     <li>{@link RegExp}: A {@link Description} describing that a specific {@link Pattern} was expected.</li>
+ *     <li>{@link Choice}: A {@link Description} that represents a choice between other {@link Description}s.</li>
+ *     <li>{@link Sequence}: A {@link Description} that represents a sequence of other {@link Description}s.</li>
+ *     <li>{@link Negation}: A {@link Description} that negates another {@link Description}.</li>
+ *     <li>{@link Empty}: An empty {@link Description} that does not provide further information.</li>
+ * </ul>
+ *
+ * @see Choice
+ * @see Literal
+ * @see Negation
+ * @see RegExp
+ * @see Sequence
+ * @see Empty
+ * @see Parser
+ *
+ * @author Björn Lötters
+ */
 public sealed abstract class Description permits Choice, Literal, Negation, RegExp, Sequence, Empty {
 
+    /**
+     * Produces a {@link String} that explains what a corresponding {@link Parser} would expect according to this
+     * {@link Description}.
+     * @return An {@link Optional} {@link String} where {@link Optional#empty()} is returned in case this
+     *         {@link Description} is {@link Empty}.
+     */
     public abstract Optional<String> describe();
 
+    /**
+     * Normalizes this {@link Description} such that {@link Negation}s are propagated downwards to the individual
+     * {@link Literal} and {@link RegExp} {@link Description}s. Moreover, {@link Choice}s of {@link RegExp}s are
+     * summarized into a single {@link RegExp}.
+     * @return The normalized {@link Description}.
+     */
     public final Description normalize() {
         return normalize(this, false);
     }
 
+    /**
+     * Normalizes the provided {@link Description} such that {@link Negation}s are propagated downwards to the individual
+     * {@link Literal} and {@link RegExp} {@link Description}s. Moreover, {@link Choice}s of {@link RegExp}s are
+     * summarized into a single {@link RegExp}.
+     * @param description The {@link Description} that shall be normalized.
+     * @param negate Whether the provided {@link Description} should be negated or not.
+     * @return The normalized {@link Description}.
+     */
     private static Description normalize(final Description description, final boolean negate) {
         switch (description) {
             case Choice choice: {
@@ -55,6 +102,12 @@ private static Description normalize(final Description description, final boolea
         }
     }
 
+    /**
+     * Collects nested {@link Choice}s into a single {@link List} of {@link Description}s.
+     * @param choice The {@link Choice} whose {@link Description}s should be collected.
+     * @param alternatives The {@link List} to which the individual {@link Description}s should be added.
+     * @return The provided {@link List}.
+     */
     private static List<Description> collect(final Choice choice, final List<Description> alternatives) {
         for (final Description alternative : choice.alternatives) {
             if (alternative instanceof Choice other) {
@@ -67,6 +120,12 @@ private static List<Description> collect(final Choice choice, final List<Descrip
         return alternatives;
     }
 
+    /**
+     * Collects nested {@link Sequence}s into a single {@link List} of {@link Description}s.
+     * @param sequence The {@link Sequence} whose {@link Description}s should be collected.
+     * @param elements The {@link List} to which the individual {@link Description}s should be added.
+     * @return The provided {@link List}.
+     */
     private static List<Description> collect(final Sequence sequence, final List<Description> elements) {
         for (final Description element : sequence.elements) {
             if (element instanceof Sequence other) {
@@ -79,14 +138,28 @@ private static List<Description> collect(final Sequence sequence, final List<Des
         return elements;
     }
 
+    /**
+     * Creates a new {@link Description} by creating a {@link Sequence} from {@code this} and the provided {@link Description}.
+     * @param description The second {@link Description}.
+     * @return A {@link Sequence} {@link Description} containing {@code this} and the provided {@link Description} in this order.
+     */
     public final Description and(final Description description) {
         return new Sequence(List.of(this, description));
     }
 
+    /**
+     * Creates a new {@link Description} by creating a {@link Choice} from {@code this} and the provided {@link Description}.
+     * @param alternative The alternative {@link Description}.
+     * @return A {@link Choice} {@link Description} containing {@code this} and the provided {@link Description} in this order.
+     */
     public final Description or(final Description alternative) {
         return new Choice(List.of(this, alternative));
     }
 
+    /**
+     * Creates a new {@link Description} by creating a {@link Negation} from {@code this} {@link Description}.
+     * @return A {@link Negation} of {@code this} {@link Description}.
+     */
     public final Description negate() {
         return new Negation(this);
     }
diff --git a/core/src/main/java/jcombinators/input/CharacterInput.java b/core/src/main/java/jcombinators/input/CharacterInput.java
index d220a41..d593e7a 100644
--- a/core/src/main/java/jcombinators/input/CharacterInput.java
+++ b/core/src/main/java/jcombinators/input/CharacterInput.java
@@ -1,6 +1,10 @@
 package jcombinators.input;
 
+import java.util.Arrays;
+import java.util.List;
 import java.util.NoSuchElementException;
+import java.util.Objects;
+import java.util.stream.Collectors;
 
 /**
  * A specific implementation of an {@link Input} for {@link Character}s that also implements the {@link CharSequence}
@@ -25,6 +29,15 @@ public final class CharacterInput extends Input<Character> implements CharSequen
 
     private final int length;
 
+    /**
+     * Constructs a new {@link CharacterInput}.
+     * @param name A human-readable name for this {@link CharacterInput}.
+     * @param sequence The underlying {@link CharSequence} for this {@link CharacterInput}.
+     * @param offset An offset in characters that denotes the start of a subsequence in the provided {@link CharSequence}.
+     * @param length The length of the subsequence that is denoted by the offset.
+     * @param lines A cache of line offsets, which is used to compute the line and column numbers in unicode code points
+     *              on basis of the character offset.
+     */
     CharacterInput(final String name, final CharSequence sequence, final int offset, final int length, final int[] lines) {
         super(name);
         this.sequence = sequence;
@@ -88,10 +101,11 @@ public String toString() {
     }
 
     /**
-     * TODO:
-     *  1. Add more JavaDoc documentation
-     *  2. Implement RegExpParser for arbitrary Input<Character> and not only CharacterInputs
-     *  3. Implement a "skip" method that skips a prefix only (filter is probably too progressive)
+     * Represents a {@link Position} in this {@link CharacterInput} that is aware of the underlying unicode code points.
+     *
+     * @see Position
+     * @see Character
+     *
      * @author Björn Lötters
      */
     public final class CodePointPosition extends Position {
@@ -101,14 +115,24 @@ public final class CodePointPosition extends Position {
          * @param offset The offset of this {@link CodePointPosition}. An offset is the number of characters that must
          *               be skipped in the underlying {@link CharacterInput} in order to reach this {@link CodePointPosition}.
          */
-        public CodePointPosition(final int offset) {
+        private CodePointPosition(final int offset) {
             super(offset);
         }
 
+        /**
+         * Returns the code point that occurs in the associated {@link CharacterInput} at this {@link CodePointPosition}.
+         * @return The unicode code point at this {@link CodePointPosition}.
+         */
         public int getCodePoint() {
             return Character.codePointAt(sequence, offset);
         }
 
+        /**
+         * Computes the line number that corresponds to this {@link CodePointPosition} as it is perceived by the user.
+         * That is to say, this is not necessarily the character offset in this {@link CodePointPosition} but the
+         * unicode code point offset.
+         * @return The line number of this {@link CodePointPosition}.
+         */
         public int getLineNumber() {
             // Here, we do a binary search to find the index of the line number that corresponds to the offset of this position.
             int lower = 0;
@@ -131,20 +155,31 @@ public int getLineNumber() {
             return lower + 1;
         }
 
+        /**
+         * Computes the column number that corresponds to this {@link CodePointPosition} as it is perceived by the user.
+         * That is to say, this is not necessarily the character offset in this {@link CodePointPosition} but the
+         * unicode code point offset relative to the corresponding line offset.
+         * @return The column number of this {@link CodePointPosition}.
+         */
         public int getColumnNumber() {
-            final int lineOffset = lines[getLineNumber() - 1];
+            final int lineNumber = getLineNumber() - 1;
+            final int lineOffset = lineNumber < lines.length ? lines[lineNumber] : 0;
             // We add 1 here, since column numbers usually start at 1 and not 0.
             return offset - lineOffset + 1;
         }
 
         @Override
         public String describe() {
-            return String.format("character '%s'", Character.toString(getCodePoint()));
+            if (offset >= CharacterInput.this.sequence.length()) {
+                return "end of input";
+            } else {
+                return String.format("character '%s'", Character.toString(getCodePoint()));
+            }
         }
 
         @Override
         public String toString() {
-            return String.format("position %d:%d", getLineNumber(), getColumnNumber());
+            return String.format("%s at line %d and column %d", name, getLineNumber(), getColumnNumber());
         }
 
     }
diff --git a/core/src/main/java/jcombinators/input/Input.java b/core/src/main/java/jcombinators/input/Input.java
index dd57df5..fbf173c 100644
--- a/core/src/main/java/jcombinators/input/Input.java
+++ b/core/src/main/java/jcombinators/input/Input.java
@@ -1,22 +1,23 @@
 package jcombinators.input;
 
+import jcombinators.Parsing.Parser;
+
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.charset.Charset;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.sql.Array;
-import java.util.*;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+import java.util.Objects;
 import java.util.function.Predicate;
 import java.util.stream.IntStream;
 import java.util.stream.Stream;
 import java.util.stream.StreamSupport;
 
-import jcombinators.Parsing.Parser;
-
 /**
- * An abstract representation of an {@link Input} for parsing. In most cases, this will be a {@link CharInputOld}, which is
- * an {@link Input} of {@link Character} and can be considered a sequence of characters (just like a {@link String}).
+ * An abstract representation of an {@link Input} for parsing. In most cases, this will be a {@link CharacterInput}, which
+ * is an {@link Input} of {@link Character}s and can be considered a sequence of characters (just like a {@link String}).
  * @param <T> The element type for this {@link Input}. In most cases this type equals {@link Character}.
  *
  * @see Parser
@@ -196,6 +197,7 @@ public static <T> Input<T> of(final String name, final Stream<T> stream) {
      * @param stream The underlying source for this {@link Input}.
      * @param charset The {@link Charset} that shall be used to decode the {@link InputStream}.
      * @return An {@link Input} of {@link Character}s that is based on the underlying {@link InputStream}.
+     * @throws IOException If reading from the {@link InputStream} fails.
      */
     public static Input<Character> of(final String name, final InputStream stream, final Charset charset) throws IOException {
         final byte[] bytes = stream.readAllBytes();
@@ -208,6 +210,7 @@ public static Input<Character> of(final String name, final InputStream stream, f
      *             used as the name for the {@link Input}.
      * @param charset The {@link Charset} that shall be used to decode the file denoted by the provided {@link Path}.
      * @return An {@link Input} of {@link Character}s that is based on the underlying file contents.
+     * @throws IOException If reading the file contents denoted by the {@link Path} fails.
      */
     public static Input<Character> of(final Path path, final Charset charset) throws IOException {
         final byte[] bytes = Files.readAllBytes(path);
diff --git a/core/src/test/java/jcombinators/CombinatorTest.java b/core/src/test/java/jcombinators/CombinatorTest.java
index 4c259b9..2eb66dc 100644
--- a/core/src/test/java/jcombinators/CombinatorTest.java
+++ b/core/src/test/java/jcombinators/CombinatorTest.java
@@ -2,7 +2,6 @@
 
 import jcombinators.data.Product;
 import jcombinators.input.Input;
-import jcombinators.position.Position;
 import org.junit.Test;
 
 import java.util.List;
@@ -226,21 +225,21 @@ public void positionParserCorrectPositionTest() {
         final String contents = "line1\nline2\nline3\n";
         final Input<Character> input = Input.of("test", contents);
 
-        final Parser<Function<Position, String>> parser = regex("line[0-9]\n").map(ignore -> position -> position.line + ":" + position.column);
+        final Parser<Function<Input<Character>.Position, String>> parser = regex("line[0-9]\n").map(ignore -> position -> position.toString());
 
         final Parser<String> positionParser = position(parser);
         final Result<String> firstResult = positionParser.apply(input);
 
         assertTrue(firstResult.isSuccess());
-        assertEquals("1:1", firstResult.get().get());
+        assertEquals("position 1:1", firstResult.get().get());
 
         final Result<String> secondResult = positionParser.apply(firstResult.rest);
         assertTrue(secondResult.isSuccess());
-        assertEquals("2:1", secondResult.get().get());
+        assertEquals("position 2:1", secondResult.get().get());
 
         final Result<String> thirdResult = positionParser.apply(secondResult.rest);
         assertTrue(thirdResult.isSuccess());
-        assertEquals("3:1", thirdResult.get().get());
+        assertEquals("position 3:1", thirdResult.get().get());
     }
 
 }