diff --git a/commonmark/src/main/java/org/commonmark/internal/util/LineReader.java b/commonmark/src/main/java/org/commonmark/internal/util/LineReader.java new file mode 100644 index 000000000..b44098257 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/internal/util/LineReader.java @@ -0,0 +1,149 @@ +package org.commonmark.internal.util; + +import java.io.Closeable; +import java.io.IOException; +import java.io.Reader; + +/** + * Reads lines from a reader like {@link java.io.BufferedReader} but also returns the line terminators. + *
+ * Line terminators can be either a line feed {@code "\n"}, carriage return {@code "\r"}, or a carriage return followed + * by a line feed {@code "\r\n"}. Call {@link #getLineTerminator()} after {@link #readLine()} to obtain the + * corresponding line terminator. If a stream has a line at the end without a terminator, {@link #getLineTerminator()} + * returns {@code null}. + */ +public class LineReader implements Closeable { + + // Same as java.io.BufferedReader + static final int CHAR_BUFFER_SIZE = 8192; + static final int EXPECTED_LINE_LENGTH = 80; + + private Reader reader; + private char[] cbuf; + + private int position = 0; + private int limit = 0; + + private String lineTerminator = null; + + public LineReader(Reader reader) { + this.reader = reader; + this.cbuf = new char[CHAR_BUFFER_SIZE]; + } + + /** + * Read a line of text. + * + * @return the line, or {@code null} when the end of the stream has been reached and no more lines can be read + */ + public String readLine() throws IOException { + StringBuilder sb = null; + boolean cr = false; + + while (true) { + if (position >= limit) { + fill(); + } + + if (cr) { + // We saw a CR before, check if we have CR LF or just CR. + if (position < limit && cbuf[position] == '\n') { + position++; + return line(sb.toString(), "\r\n"); + } else { + return line(sb.toString(), "\r"); + } + } + + if (position >= limit) { + // End of stream, return either the last line without terminator or null for end. + return line(sb != null ? sb.toString() : null, null); + } + + int start = position; + int i = position; + for (; i < limit; i++) { + char c = cbuf[i]; + if (c == '\n') { + position = i + 1; + return line(finish(sb, start, i), "\n"); + } else if (c == '\r') { + if (i + 1 < limit) { + // We know what the next character is, so we can check now whether we have + // a CR LF or just a CR and return. + if (cbuf[i + 1] == '\n') { + position = i + 2; + return line(finish(sb, start, i), "\r\n"); + } else { + position = i + 1; + return line(finish(sb, start, i), "\r"); + } + } else { + // We don't know what the next character is yet, check on next iteration. + cr = true; + position = i + 1; + break; + } + } + } + + if (position < i) { + position = i; + } + + // Haven't found a finished line yet, copy the data from the buffer so that we can fill + // the buffer again. + if (sb == null) { + sb = new StringBuilder(EXPECTED_LINE_LENGTH); + } + sb.append(cbuf, start, i - start); + } + } + + /** + * Return the line terminator of the last read line from {@link #readLine()}. + * + * @return {@code "\n"}, {@code "\r"}, {@code "\r\n"}, or {@code null} + */ + public String getLineTerminator() { + return lineTerminator; + } + + @Override + public void close() throws IOException { + if (reader == null) { + return; + } + try { + reader.close(); + } finally { + reader = null; + cbuf = null; + } + } + + private void fill() throws IOException { + int read; + do { + read = reader.read(cbuf, 0, cbuf.length); + } while (read == 0); + if (read > 0) { + limit = read; + position = 0; + } + } + + private String line(String line, String lineTerminator) { + this.lineTerminator = lineTerminator; + return line; + } + + private String finish(StringBuilder sb, int start, int end) { + int len = end - start; + if (sb == null) { + return new String(cbuf, start, len); + } else { + return sb.append(cbuf, start, len).toString(); + } + } +} diff --git a/commonmark/src/test/java/org/commonmark/internal/util/LineReaderTest.java b/commonmark/src/test/java/org/commonmark/internal/util/LineReaderTest.java new file mode 100644 index 000000000..fc48623a8 --- /dev/null +++ b/commonmark/src/test/java/org/commonmark/internal/util/LineReaderTest.java @@ -0,0 +1,128 @@ +package org.commonmark.internal.util; + +import org.junit.Test; + +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Objects; + +import static java.util.stream.Collectors.joining; +import static org.commonmark.internal.util.LineReader.CHAR_BUFFER_SIZE; +import static org.junit.Assert.*; + +public class LineReaderTest { + + @Test + public void testReadLine() throws IOException { + assertLines(); + + assertLines("", "\n"); + assertLines("foo", "\n", "bar", "\n"); + assertLines("foo", "\n", "bar", null); + assertLines("", "\n", "", "\n"); + assertLines(repeat("a", CHAR_BUFFER_SIZE - 1), "\n"); + assertLines(repeat("a", CHAR_BUFFER_SIZE), "\n"); + assertLines(repeat("a", CHAR_BUFFER_SIZE) + "b", "\n"); + + assertLines("", "\r\n"); + assertLines("foo", "\r\n", "bar", "\r\n"); + assertLines("foo", "\r\n", "bar", null); + assertLines("", "\r\n", "", "\r\n"); + assertLines(repeat("a", CHAR_BUFFER_SIZE - 2), "\r\n"); + assertLines(repeat("a", CHAR_BUFFER_SIZE - 1), "\r\n"); + assertLines(repeat("a", CHAR_BUFFER_SIZE), "\r\n"); + assertLines(repeat("a", CHAR_BUFFER_SIZE) + "b", "\r\n"); + + assertLines("", "\r"); + assertLines("foo", "\r", "bar", "\r"); + assertLines("foo", "\r", "bar", null); + assertLines("", "\r", "", "\r"); + assertLines(repeat("a", CHAR_BUFFER_SIZE - 1), "\r"); + assertLines(repeat("a", CHAR_BUFFER_SIZE), "\r"); + assertLines(repeat("a", CHAR_BUFFER_SIZE) + "b", "\r"); + + assertLines("", "\n", "", "\r", "", "\r\n", "", "\n"); + assertLines("what", "\r", "are", "\r", "", "\r", "you", "\r\n", "", "\r\n", "even", "\n", "doing", null); + } + + @Test + public void testClose() throws IOException { + var reader = new InputStreamReader(new ByteArrayInputStream("test".getBytes(StandardCharsets.UTF_8))); + var lineReader = new LineReader(reader); + lineReader.close(); + lineReader.close(); + try { + reader.read(); + fail("Expected read to throw after closing reader"); + } catch (IOException e) { + // Expected + } + } + + private void assertLines(String... s) throws IOException { + assertTrue("Expected parts needs to be even (pairs of content and terminator)", s.length % 2 == 0); + var input = Arrays.stream(s).filter(Objects::nonNull).collect(joining("")); + + assertLines(new StringReader(input), s); + assertLines(new SlowStringReader(input), s); + } + + private static void assertLines(Reader reader, String... expectedParts) throws IOException { + try (var lineReader = new LineReader(reader)) { + var lines = new ArrayList<>(); + String line; + while ((line = lineReader.readLine()) != null) { + lines.add(line); + lines.add(lineReader.getLineTerminator()); + } + assertNull(lineReader.getLineTerminator()); + assertEquals(Arrays.asList(expectedParts), lines); + } + } + + private static String repeat(String s, int count) { + StringBuilder sb = new StringBuilder(s.length() * count); + for (int i = 0; i < count; i++) { + sb.append(s); + } + return sb.toString(); + } + + /** + * Reader that only reads 0 or 1 chars at a time to test the corner cases. + */ + private static class SlowStringReader extends Reader { + + private final String s; + private int position = 0; + private boolean empty = false; + + private SlowStringReader(String s) { + this.s = s; + } + + @Override + public int read(char[] cbuf, int off, int len) throws IOException { + Objects.checkFromIndexSize(off, len, cbuf.length); + if (len == 0) { + return 0; + } + empty = !empty; + if (empty) { + // Return 0 every other time to test handling of 0. + return 0; + } + if (position >= s.length()) { + return -1; + } + cbuf[off] = s.charAt(position++); + return 1; + } + + @Override + public void close() throws IOException { + } + } +}