From a829bdbc36badb31514ea55de6b295be96c17a2b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= <radoslaw.wasko@enso.org>
Date: Wed, 5 Feb 2025 18:07:34 +0100
Subject: [PATCH] Add extra columns when reading Delimited file (#12231)

- Closes #12186
---
 CHANGELOG.md                                  |  6 ++
 .../src/Delimited/Delimited_Format.enso       | 21 ++--
 .../0.0.0-dev/src/Delimited/Invalid_Rows.enso | 25 +++++
 .../src/Internal/Delimited_Reader.enso        | 14 ++-
 .../java/org/enso/table/data/table/Table.java | 13 +++
 .../table/read/DelimitedFileMetadata.java     |  4 +-
 .../org/enso/table/read/DelimitedReader.java  | 78 ++++++++++++---
 .../src/IO/Delimited_Read_Spec.enso           | 95 ++++++++++++++++---
 8 files changed, 219 insertions(+), 37 deletions(-)
 create mode 100644 distribution/lib/Standard/Table/0.0.0-dev/src/Delimited/Invalid_Rows.enso

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 42f0acc789e6..b9a7e90003ed 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -42,11 +42,17 @@
 - [Reducing helper methods in `Standard.Base.Meta`.][12031]
 - [Added Table.Offset][12071]
 - [Added Column.Offset][12092]
+- [When reading a Delimited file, if a row with more columns than expected is
+  encountered, extra columns can be added to the result.][12231]
+  - In `Delimited` format, the `keep_invalid_rows` setting has been renamed to
+    `on_invalid_rows`. The default behaviour was also changed to add any extra
+    columns instead of discarding them.
 
 [11926]: https://github.com/enso-org/enso/pull/11926
 [12031]: https://github.com/enso-org/enso/pull/12031
 [12071]: https://github.com/enso-org/enso/pull/12071
 [12092]: https://github.com/enso-org/enso/pull/12092
+[12231]: https://github.com/enso-org/enso/pull/12231
 
 #### Enso Language & Runtime
 
diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Delimited/Delimited_Format.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Delimited/Delimited_Format.enso
index 510f2ddf9f36..b5355c211b31 100644
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Delimited/Delimited_Format.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Delimited/Delimited_Format.enso
@@ -11,6 +11,7 @@ from Standard.Base.System.File_Format import parse_boolean_with_infer
 from Standard.Base.Widget_Helpers import make_file_read_delimiter_selector
 
 import project.Data_Formatter.Data_Formatter
+import project.Delimited.Invalid_Rows.Invalid_Rows
 import project.Delimited.Quote_Style.Quote_Style
 import project.Headers.Headers
 import project.Internal.Delimited_Reader
@@ -45,9 +46,8 @@ type Delimited_Format
          appended to disambiguate them.
        - value_formatter: Formatter to parse text values into numbers, dates,
          times, etc. If `Nothing` values are left as Text.
-       - keep_invalid_rows: Specifies whether rows that contain less or more
-         columns than expected should be kept (setting the missing columns to
-         `Nothing` or dropping the excess columns) or dropped.
+       - on_invalid_rows: Specifies how to handle rows that have less or more
+         columns than the first row.
        - line_endings: Sets the line ending style to use. Defaults to `Infer` -
          when reading a file or appending to an existing file, the line endings
          are detected from file contents; when writing a new file in `Infer`
@@ -61,7 +61,7 @@ type Delimited_Format
     @delimiter make_file_read_delimiter_selector
     @encoding Encoding.default_widget
     @row_limit Rows_To_Read.default_widget
-    Delimited (delimiter:Text=',') (encoding:Encoding=Encoding.default) (skip_rows:Integer=0) (row_limit:Rows_To_Read=..All_Rows) (quote_style:Quote_Style=Quote_Style.With_Quotes) (headers:Headers=Headers.Detect_Headers) (value_formatter:Data_Formatter|Nothing=Data_Formatter.Value) (keep_invalid_rows:Boolean=True) (line_endings:Line_Ending_Style|Infer=Infer) (comment_character:Text|Nothing=Nothing)
+    Delimited (delimiter:Text=',') (encoding:Encoding=Encoding.default) (skip_rows:Integer=0) (row_limit:Rows_To_Read=..All_Rows) (quote_style:Quote_Style=Quote_Style.With_Quotes) (headers:Headers=Headers.Detect_Headers) (value_formatter:Data_Formatter|Nothing=Data_Formatter.Value) (on_invalid_rows:Invalid_Rows=Invalid_Rows.Add_Extra_Columns) (line_endings:Line_Ending_Style|Infer=Infer) (comment_character:Text|Nothing=Nothing)
 
     ## PRIVATE
        Resolve an unresolved constructor to the actual type.
@@ -125,8 +125,8 @@ type Delimited_Format
     ## PRIVATE
        Clone the instance with some properties overridden.
     clone : Encoding -> Quote_Style -> Headers -> (Data_Formatter|Nothing) -> Boolean -> (Text|Nothing) -> (Text|Nothing) -> Delimited_Format
-    clone self (encoding:Encoding = self.encoding) (quote_style:Quote_Style=self.quote_style) (headers:Headers=self.headers) (value_formatter=self.value_formatter) (keep_invalid_rows:Boolean=self.keep_invalid_rows) (line_endings=self.line_endings) (comment_character=self.comment_character) =
-        Delimited_Format.Delimited self.delimiter encoding self.skip_rows self.row_limit quote_style headers value_formatter keep_invalid_rows line_endings comment_character
+    clone self (encoding:Encoding = self.encoding) (quote_style:Quote_Style=self.quote_style) (headers:Headers=self.headers) (value_formatter=self.value_formatter) (on_invalid_rows:Invalid_Rows=self.on_invalid_rows) (line_endings=self.line_endings) (comment_character=self.comment_character) =
+        Delimited_Format.Delimited self.delimiter encoding self.skip_rows self.row_limit quote_style headers value_formatter on_invalid_rows line_endings comment_character
 
     ## ICON data_input
        Create a clone of this with specified quoting settings.
@@ -194,7 +194,12 @@ Delimited_Format.from (that : JS_Object) =
     headers = that.get "headers" |> parse_boolean_with_infer "headers"
     skip_rows = that.get "skip_rows" . if_nothing 0
     row_limit = that.get "row_limit"
-    keep_invalid_rows = that.get "keep_invalid_rows" . if_nothing True
+    on_invalid_rows = case that.get "on_invalid_rows" of
+        True -> Invalid_Rows.Keep_Invalid_Rows
+        False -> Invalid_Rows.Drop_Invalid_Rows
+        "add_extra_columns" -> Invalid_Rows.Add_Extra_Columns
+        Nothing -> Invalid_Rows.Add_Extra_Columns
+        other -> Error.throw (Illegal_Argument.Error "Invalid value for `on_invalid_rows`: "+other.to_display_text)
     quote_style = case that.get "quote_style" of
         Nothing -> Quote_Style.With_Quotes
         json -> Quote_Style.from json
@@ -202,6 +207,6 @@ Delimited_Format.from (that : JS_Object) =
     unsupported_fields = ["value_formatter", "line_endings", "comment_character"]
     case unsupported_fields.find that.contains_key if_missing=Nothing of
         Nothing ->
-            Delimited_Format.Delimited delimiter=delimiter encoding=encoding headers=headers skip_rows=skip_rows row_limit=row_limit quote_style=quote_style keep_invalid_rows=keep_invalid_rows
+            Delimited_Format.Delimited delimiter=delimiter encoding=encoding headers=headers skip_rows=skip_rows row_limit=row_limit quote_style=quote_style on_invalid_rows=on_invalid_rows
         field ->
             Error.throw (Illegal_Argument.Error ("The field `" ++ field ++ "` is currently not supported when deserializing the Delimited format from JSON."))
diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Delimited/Invalid_Rows.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Delimited/Invalid_Rows.enso
new file mode 100644
index 000000000000..2cc0fbcf4b30
--- /dev/null
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Delimited/Invalid_Rows.enso
@@ -0,0 +1,25 @@
+from Standard.Base import Boolean, False, True
+
+## Describes what to do with rows that have unexpected number of columns.
+type Invalid_Rows
+    ## Rows that contain too few or too many columns are dropped.
+    Drop_Invalid_Rows
+
+    ## Rows that contain too few or too many columns are kept
+
+       - If a row has too few columns, the missing columns are filled with `Nothing`.
+       - If it has too many, the extra columns are dropped.
+    Keep_Invalid_Rows
+
+    ## Rows that contain more columns than expected are kept, and the extra columns are added.
+
+       - If a row has too few columns, the missing columns are filled with `Nothing`.
+       - If it has too many, the extra columns are kept. The previous rows that
+         had less columns are filled with `Nothing`.
+    Add_Extra_Columns
+
+## PRIVATE
+   A conversion for backward compatibility.
+Invalid_Rows.from (that : Boolean) = case that of
+    True -> Invalid_Rows.Keep_Invalid_Rows
+    False -> Invalid_Rows.Drop_Invalid_Rows
diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Delimited_Reader.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Delimited_Reader.enso
index 077455a2029a..6e257482e8e8 100644
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Delimited_Reader.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Delimited_Reader.enso
@@ -6,6 +6,7 @@ import Standard.Base.System.Input_Stream.Input_Stream
 
 import project.Data_Formatter.Data_Formatter
 import project.Delimited.Delimited_Format.Delimited_Format
+import project.Delimited.Invalid_Rows.Invalid_Rows
 import project.Delimited.Quote_Style.Quote_Style
 import project.Headers.Headers
 import project.Internal.Java_Problems
@@ -25,6 +26,7 @@ polyglot java import org.enso.table.parsing.problems.MismatchedQuote
 polyglot java import org.enso.table.parsing.TypeInferringParser
 polyglot java import org.enso.table.read.DelimitedReader
 polyglot java import org.enso.table.read.DelimitedReader.HeaderBehavior
+polyglot java import org.enso.table.read.DelimitedReader.InvalidRowsBehavior
 polyglot java import org.enso.table.read.ParsingFailedException
 polyglot java import org.enso.table.read.QuoteStrippingParser
 
@@ -124,9 +126,13 @@ prepare_reader format:Delimited_Format max_columns on_problems:Problem_Behavior
     newline = newline_override.if_nothing <| case format.line_endings of
         Infer -> Nothing
         endings -> endings.to_text
+    on_invalid_rows_java = case format.on_invalid_rows of
+        Invalid_Rows.Keep_Invalid_Rows -> InvalidRowsBehavior.KEEP
+        Invalid_Rows.Drop_Invalid_Rows -> InvalidRowsBehavior.DROP
+        Invalid_Rows.Add_Extra_Columns -> InvalidRowsBehavior.ADD_EXTRA_COLUMNS
 
     warnings_as_errors = on_problems == Problem_Behavior.Report_Error
-    DelimitedReader.new format.delimiter quote_characters.first quote_characters.second java_headers format.skip_rows row_limit max_columns value_parser cell_type_guesser format.keep_invalid_rows newline format.comment_character warnings_as_errors java_problem_aggregator
+    DelimitedReader.new format.delimiter quote_characters.first quote_characters.second java_headers format.skip_rows row_limit max_columns value_parser cell_type_guesser on_invalid_rows_java newline format.comment_character warnings_as_errors java_problem_aggregator
 
 ## PRIVATE
    An internal type representing columns deduced from an existing file.
@@ -134,7 +140,11 @@ type Detected_Headers
     ## Represents the headers found in the file.
     Existing (column_names : Vector Text)
 
-    ## Indicates that the file exists but no headers have been found, so only positional column matching is possible.
+    ## Indicates that the file exists but no headers have been found,
+       so only positional column matching is possible.
+
+       Note that the file may still contain rows that have less or more columns
+       than specified here. This column count is only based on the first row.
     None (column_count : Integer)
 
 ## PRIVATE
diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/Table.java b/std-bits/table/src/main/java/org/enso/table/data/table/Table.java
index 58e35d3d73d6..cd251dddd4ac 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/table/Table.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/Table.java
@@ -52,6 +52,8 @@ public Table(Column[] columns) {
       throw new IllegalArgumentException("Column names must be unique within a Table.");
     }
 
+    assert checkAllColumnsHaveSameSize(columns) : "All columns must have the same row count.";
+
     this.columns = columns;
   }
 
@@ -67,6 +69,17 @@ private static boolean checkUniqueColumns(Column[] columns) {
     return true;
   }
 
+  private static boolean checkAllColumnsHaveSameSize(Column[] columns) {
+    int size = columns[0].getSize();
+    for (Column column : columns) {
+      if (column.getSize() != size) {
+        return false;
+      }
+    }
+
+    return true;
+  }
+
   /**
    * @return the number of rows in this table
    */
diff --git a/std-bits/table/src/main/java/org/enso/table/read/DelimitedFileMetadata.java b/std-bits/table/src/main/java/org/enso/table/read/DelimitedFileMetadata.java
index 98b2e6947e2d..d39848643f03 100644
--- a/std-bits/table/src/main/java/org/enso/table/read/DelimitedFileMetadata.java
+++ b/std-bits/table/src/main/java/org/enso/table/read/DelimitedFileMetadata.java
@@ -1,5 +1,7 @@
 package org.enso.table.read;
 
+import java.util.List;
+
 /**
  * Metadata that can be detected by the DelimitedReader.
  *
@@ -14,6 +16,6 @@
  */
 public record DelimitedFileMetadata(
     long columnCount,
-    String[] definedColumnNames,
+    List<String> definedColumnNames,
     boolean hasAnyContent,
     String effectiveLineSeparator) {}
diff --git a/std-bits/table/src/main/java/org/enso/table/read/DelimitedReader.java b/std-bits/table/src/main/java/org/enso/table/read/DelimitedReader.java
index 5dd11ff52b72..eef35ce58326 100644
--- a/std-bits/table/src/main/java/org/enso/table/read/DelimitedReader.java
+++ b/std-bits/table/src/main/java/org/enso/table/read/DelimitedReader.java
@@ -60,7 +60,7 @@ public class DelimitedReader {
   private final CsvParser parser;
   private final DatatypeParser valueParser;
   private final TypeInferringParser cellTypeGuesser;
-  private final boolean keepInvalidRows;
+  private final InvalidRowsBehavior keepInvalidRows;
   private String newlineSetting;
   private final NoOpParseProblemAggregator noOpProblemAggregator = new NoOpParseProblemAggregator();
   private long targetTableIndex = 0;
@@ -69,6 +69,7 @@ public class DelimitedReader {
   private long currentLine = 0;
 
   private List<BuilderForType<String>> builders = null;
+  private int initialColumnCount = 0;
   private final DelimitedReaderProblemAggregator problemAggregator;
 
   /**
@@ -111,7 +112,7 @@ public DelimitedReader(
       int maxColumns,
       DatatypeParser valueParser,
       TypeInferringParser cellTypeGuesser,
-      boolean keepInvalidRows,
+      InvalidRowsBehavior keepInvalidRows,
       String newline,
       String commentCharacter,
       boolean warningsAsErrors,
@@ -266,17 +267,36 @@ private void appendRow(String[] row) {
     assert canFitMoreRows();
 
     if (row.length != builders.size()) {
-      problemAggregator.reportInvalidRow(
-          currentLine, keepInvalidRows ? targetTableIndex : null, row, builders.size());
+      boolean isRowKept =
+          switch (keepInvalidRows) {
+            case DROP -> false;
+            case KEEP, ADD_EXTRA_COLUMNS -> true;
+          };
+
+      // The error is only reported if the column count does not match the initial column count.
+      // Otherwise, a single row with more columns in ADD_EXTRA_COLUMNS mode will expand the
+      // builders and all subsequent rows (that had original column count) would turn into warnings.
+      // Such flood of warnings is not useful. Instead, we only warn on the occurrences that expand
+      // the column count, or that have fewer columns than originally expected.
+      if (row.length != initialColumnCount) {
+        problemAggregator.reportInvalidRow(
+            currentLine, isRowKept ? targetTableIndex : null, row, builders.size());
+      }
+
+      if (isRowKept) {
+        // If the current row had more columns than expected, they are either discarded or added as
+        // extra columns.
+        if (keepInvalidRows == InvalidRowsBehavior.ADD_EXTRA_COLUMNS
+            && row.length > builders.size()) {
+          addExtraColumns(row.length - builders.size());
+        }
 
-      if (keepInvalidRows) {
         for (int i = 0; i < builders.size() && i < row.length; i++) {
           builders.get(i).append(row[i]);
         }
 
         // If the current row had fewer columns than expected, nulls are inserted for the missing
         // values.
-        // If it had more columns, the excess columns are discarded.
         for (int i = row.length; i < builders.size(); i++) {
           builders.get(i).appendNulls(1);
         }
@@ -292,6 +312,17 @@ private void appendRow(String[] row) {
     }
   }
 
+  private void addExtraColumns(int count) {
+    for (int i = 0; i < count; i++) {
+      int columnIndex = builders.size() + 1;
+      effectiveColumnNames.add(COLUMN_NAME + " " + columnIndex);
+      var builder = constructBuilder(targetTableIndex);
+      // We ensure the new builder has the same length as the previous ones by padding with nulls.
+      builder.appendNulls(Math.toIntExact(targetTableIndex));
+      builders.add(builder);
+    }
+  }
+
   private boolean canFitMoreRows() {
     return rowLimit < 0 || targetTableIndex < rowLimit;
   }
@@ -324,7 +355,7 @@ private boolean isPlainText(String cell) {
   }
 
   /** The column names as defined in the input (if applicable, otherwise null). */
-  private String[] definedColumnNames = null;
+  private List<String> definedColumnNames = null;
 
   /**
    * The effective column names.
@@ -332,10 +363,10 @@ private boolean isPlainText(String cell) {
    * <p>If {@code GENERATE_HEADERS} is used or if {@code INFER} is used and no headers are found,
    * this will be populated with automatically generated column names.
    */
-  private String[] effectiveColumnNames;
+  private List<String> effectiveColumnNames;
 
   private int getColumnCount() {
-    return effectiveColumnNames.length;
+    return effectiveColumnNames.size();
   }
 
   /**
@@ -380,7 +411,7 @@ private void detectHeaders() {
     }
 
     if (firstRow == null) {
-      effectiveColumnNames = new String[0];
+      effectiveColumnNames = List.of();
       return;
     }
 
@@ -423,9 +454,11 @@ private void detectHeaders() {
       default -> throw new IllegalStateException("Impossible branch.");
     }
 
-    effectiveColumnNames = headerNames.toArray(new String[0]);
+    effectiveColumnNames = headerNames;
     if (wereHeadersDefined) {
-      definedColumnNames = effectiveColumnNames;
+      // We need a copy of the defined column names, as the effective column names may be modified
+      // later.
+      definedColumnNames = new ArrayList<>(effectiveColumnNames);
     }
   }
 
@@ -445,6 +478,7 @@ public Table read(Reader input) {
         throw new EmptyFileException();
       }
 
+      initialColumnCount = columnCount;
       initBuilders(columnCount);
       while (canFitMoreRows()) {
         var currentRow = readNextRow();
@@ -461,7 +495,7 @@ public Table read(Reader input) {
 
     Column[] columns = new Column[builders.size()];
     for (int i = 0; i < builders.size(); i++) {
-      String columnName = effectiveColumnNames[i];
+      String columnName = effectiveColumnNames.get(i);
       var stringStorage = builders.get(i).seal();
 
       // We don't expect InvalidFormat to be propagated back to Enso, there is no particular type
@@ -493,10 +527,14 @@ private void markUsed() {
   private void initBuilders(int count) {
     builders = new ArrayList<>(count);
     for (int i = 0; i < count; i++) {
-      builders.add(Builder.getForText(TextType.VARIABLE_LENGTH, INITIAL_ROW_CAPACITY));
+      builders.add(constructBuilder(INITIAL_ROW_CAPACITY));
     }
   }
 
+  private BuilderForType<String> constructBuilder(long initialCapacity) {
+    return Builder.getForText(TextType.VARIABLE_LENGTH, initialCapacity);
+  }
+
   /** Specifies how to set the headers for the returned table. */
   public enum HeaderBehavior {
     /** Tries to infer if the headers are present in the file. */
@@ -510,4 +548,16 @@ public enum HeaderBehavior {
      */
     GENERATE_HEADERS
   }
+
+  /** Specifies how to handle rows with unexpected number of columns. */
+  public enum InvalidRowsBehavior {
+    /** Discards rows with unexpected number of columns. */
+    DROP,
+
+    /** Keeps rows with unexpected number of columns, but the additional columns are discarded. */
+    KEEP,
+
+    /** Keeps rows with unexpected number of columns, adding extra columns. */
+    ADD_EXTRA_COLUMNS
+  }
 }
diff --git a/test/Table_Tests/src/IO/Delimited_Read_Spec.enso b/test/Table_Tests/src/IO/Delimited_Read_Spec.enso
index c0dc864aa6cc..c14fbab5908a 100644
--- a/test/Table_Tests/src/IO/Delimited_Read_Spec.enso
+++ b/test/Table_Tests/src/IO/Delimited_Read_Spec.enso
@@ -283,9 +283,65 @@ add_specs suite_builder =
             t3.column_names.should_equal ["A", "B", "C"]
             t3.print
 
-        group_builder.specify "should handle too long and too short rows" <|
-            action keep_invalid_rows on_problems =
-                Data.read (enso_project.data / "varying_rows.csv") (..Delimited "," headers=True keep_invalid_rows=keep_invalid_rows value_formatter=Nothing) on_problems=on_problems
+        group_builder.specify "should default to adding extra columns if rows longer than first one appear, and padding shorter with Nothing" <|
+            r2 = Data.read (enso_project.data / "varying_rows2.csv") (..Delimited "," headers=..Has_Headers value_formatter=Nothing)
+            r2.column_names . should_equal ["a", "b", "c", "Column 4"]
+            r2.at "a" . to_vector . should_equal ["1", "0", "4"]
+            r2.at "b" . to_vector . should_equal ["2", "0", "5"]
+            r2.at "c" . to_vector . should_equal ["3", "0", "6"]
+            r2.at "Column 4" . to_vector . should_equal [Nothing, "10", Nothing]
+
+            # If there are no extra columns, shorter rows are padded with Nothing like in `Keep_Invalid_Rows`.
+            r3 = Data.read (enso_project.data / "varying_rows3.csv") (..Delimited "," headers=..Has_Headers value_formatter=Nothing)
+            r3.column_names . should_equal ["a", "b", "c"]
+            r3.at "a" . to_vector . should_equal ["1", "0", "4"]
+            r3.at "b" . to_vector . should_equal ["2", "0", "5"]
+            r3.at "c" . to_vector . should_equal ["3", Nothing, "6"]
+
+            r1 = Data.read (enso_project.data / "varying_rows.csv") (..Delimited "," headers=..Has_Headers value_formatter=Nothing)
+            r1.column_names . should_equal ["a", "b", "c", "Column 4", "Column 5", "Column 6", "Column 7", "Column 8"]
+            r1.at "a" . to_vector . should_equal ["1", "1", "1", Nothing, "1", "1"]
+            r1.at "b" . to_vector . should_equal ["2", "2", "2", Nothing, Nothing, "2"]
+            r1.at "c" . to_vector . should_equal ["3", "3", Nothing, Nothing, Nothing, "3"]
+            r1.at "Column 4" . to_vector . should_equal ["4", Nothing, Nothing, Nothing, Nothing, "4"]
+            r1.at "Column 5" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing, Nothing, "5"]
+            r1.at "Column 6" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing, Nothing, "6"]
+            r1.at "Column 7" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing, Nothing, "7"]
+            r1.at "Column 8" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing, Nothing, "8"]
+
+            # After the first row that expands the expected column count, later the expected column count is at 4.
+            # But the second row that has 3 columns does not report a warning, because that was the original expected column count.
+            # This is so that if a big file contains a single row with more columns, all subsequent colunns that contain the _original_ number of columns do not raise superficious errors.
+            expected_problems = [Invalid_Row.Error 2 0 ['1', '2', '3', '4'] 3, Invalid_Row.Error 4 2 ['1', '2'] 4, Invalid_Row.Error 5 3 [Nothing] 4, Invalid_Row.Error 6 4 ['1'] 4, Invalid_Row.Error 7 5 ['1', '2', '3', '4', '5', '6', '7', '8'] 4]
+            Problems.get_attached_warnings r1 . should_equal_ignoring_order expected_problems
+
+        group_builder.specify "should allow to explicitly choose to add extra columns on longer rows" <|
+            r2 = Data.read (enso_project.data / "varying_rows2.csv") (..Delimited "," headers=..Has_Headers on_invalid_rows=..Add_Extra_Columns value_formatter=Nothing)
+            r2.column_names . should_equal ["a", "b", "c", "Column 4"]
+            r2.at "a" . to_vector . should_equal ["1", "0", "4"]
+            r2.at "b" . to_vector . should_equal ["2", "0", "5"]
+            r2.at "c" . to_vector . should_equal ["3", "0", "6"]
+            r2.at "Column 4" . to_vector . should_equal [Nothing, "10", Nothing]
+
+        group_builder.specify "should still be able to infer headers with column count mismatch at first lines" <|
+            r1 = Data.read (enso_project.data / "varying_rows.csv") (..Delimited "," headers=..Detect_Headers row_limit=(..First 3) value_formatter=Nothing)
+            r1.column_names . should_equal ["a", "b", "c", "Column 4"]
+            r1.at "a" . to_vector . should_equal ["1", "1", "1"]
+            r1.at "b" . to_vector . should_equal ["2", "2", "2"]
+            r1.at "c" . to_vector . should_equal ["3", "3", Nothing]
+            r1.at "Column 4" . to_vector . should_equal ["4", Nothing, Nothing]
+
+        group_builder.specify "should correctly read no-headers with varying column count and Add_Extra_Columns" <|
+            r1 = Data.read (enso_project.data / "varying_rows.csv") (..Delimited "," headers=..No_Headers row_limit=(..First 3) on_invalid_rows=..Add_Extra_Columns value_formatter=Nothing)
+            r1.column_names.should_equal ["Column 1", "Column 2", "Column 3", "Column 4"]
+            r1.at "Column 1" . to_vector . should_equal ["a", "1", "1"]
+            r1.at "Column 2" . to_vector . should_equal ["b", "2", "2"]
+            r1.at "Column 3" . to_vector . should_equal ["c", "3", "3"]
+            r1.at "Column 4" . to_vector . should_equal [Nothing, "4", Nothing]
+
+        group_builder.specify "should allow to decide to drop or keep rows with too many or too few columns, dropping excess columns" <|
+            action on_invalid_rows on_problems =
+                Data.read (enso_project.data / "varying_rows.csv") (..Delimited "," headers=..Has_Headers on_invalid_rows=on_invalid_rows value_formatter=Nothing) on_problems=on_problems
 
             tester_kept table =
                 table.columns.map .name . should_equal ['a', 'b', 'c']
@@ -293,7 +349,7 @@ add_specs suite_builder =
                 table.at 'b' . to_vector . should_equal ['2', '2', '2', Nothing, Nothing, '2']
                 table.at 'c' . to_vector . should_equal ['3', '3', Nothing, Nothing, Nothing, '3']
             problems_kept = [Invalid_Row.Error 2 0 ['1', '2', '3', '4'] 3, Invalid_Row.Error 4 2 ['1', '2'] 3, Invalid_Row.Error 5 3 [Nothing] 3, Invalid_Row.Error 6 4 ['1'] 3, Invalid_Row.Error 7 5 ['1', '2', '3', '4', '5', '6', '7', '8'] 3]
-            Problems.test_problem_handling (action keep_invalid_rows=True) problems_kept tester_kept
+            Problems.test_problem_handling (action on_invalid_rows=..Keep_Invalid_Rows) problems_kept tester_kept
 
             tester_dropped table =
                 table.columns.map .name . should_equal ['a', 'b', 'c']
@@ -301,21 +357,36 @@ add_specs suite_builder =
                 table.at 'b' . to_vector . should_equal ['2']
                 table.at 'c' . to_vector . should_equal ['3']
             problems_dropped = [Invalid_Row.Error 2 Nothing ['1', '2', '3', '4'] 3, Invalid_Row.Error 4 Nothing ['1', '2'] 3, Invalid_Row.Error 5 Nothing [Nothing] 3, Invalid_Row.Error 6 Nothing ['1'] 3, Invalid_Row.Error 7 Nothing ['1', '2', '3', '4', '5', '6', '7', '8'] 3]
-            Problems.test_problem_handling (action keep_invalid_rows=False) problems_dropped tester_dropped
+            Problems.test_problem_handling (action on_invalid_rows=..Drop_Invalid_Rows) problems_dropped tester_dropped
 
-            r2 = Data.read (enso_project.data / "varying_rows2.csv") (..Delimited "," headers=True keep_invalid_rows=False value_formatter=Nothing)
+            r2 = Data.read (enso_project.data / "varying_rows2.csv") (..Delimited "," headers=..Has_Headers on_invalid_rows=..Drop_Invalid_Rows value_formatter=Nothing)
             r2.column_names . should_equal ['a', 'b', 'c']
-            Problems.expect_only_warning (Invalid_Row.Error 3 Nothing ['0', '0', '0', '10'] 3) r2
-            warning2 = Problems.get_attached_warnings r2 . first
+            warning2 = Problems.expect_only_warning (Invalid_Row.Error 3 Nothing ['0', '0', '0', '10'] 3) r2
             warning2.to_display_text . should_equal "The row (line 3) had too many columns (expected 3, got 4)."
             r2.at 'a' . to_vector . should_equal ['1', '4']
             r2.at 'b' . to_vector . should_equal ['2', '5']
             r2.at 'c' . to_vector . should_equal ['3', '6']
 
-            r3 = Data.read (enso_project.data / "varying_rows3.csv") (..Delimited "," headers=True keep_invalid_rows=True value_formatter=Nothing)
+            r3 = Data.read (enso_project.data / "varying_rows3.csv") (..Delimited "," headers=..Has_Headers on_invalid_rows=..Keep_Invalid_Rows value_formatter=Nothing)
             r3.column_names . should_equal ['a', 'b', 'c']
-            Problems.expect_only_warning (Invalid_Row.Error 3 1 ['0', '0'] 3) r3
-            warning3 = Problems.get_attached_warnings r3 . first
+            warning3 = Problems.expect_only_warning (Invalid_Row.Error 3 1 ['0', '0'] 3) r3
+            warning3.to_display_text . should_equal "The row (line 3, table row 1) had too few columns (expected 3, got 2)."
+            r3.at 'a' . to_vector . should_equal ['1', '0', '4']
+            r3.at 'b' . to_vector . should_equal ['2', '0', '5']
+            r3.at 'c' . to_vector . should_equal ['3', Nothing, '6']
+
+        group_builder.specify "should allow Boolean arguments for on_invalid_rows (backwards compatibility)" <|
+            r2 = Data.read (enso_project.data / "varying_rows2.csv") (..Delimited "," headers=True on_invalid_rows=False value_formatter=Nothing)
+            r2.column_names . should_equal ['a', 'b', 'c']
+            warning2 = Problems.expect_only_warning Invalid_Row r2
+            warning2.to_display_text . should_equal "The row (line 3) had too many columns (expected 3, got 4)."
+            r2.at 'a' . to_vector . should_equal ['1', '4']
+            r2.at 'b' . to_vector . should_equal ['2', '5']
+            r2.at 'c' . to_vector . should_equal ['3', '6']
+
+            r3 = Data.read (enso_project.data / "varying_rows3.csv") (..Delimited "," headers=True on_invalid_rows=True value_formatter=Nothing)
+            r3.column_names . should_equal ['a', 'b', 'c']
+            warning3 = Problems.expect_only_warning Invalid_Row r3
             warning3.to_display_text . should_equal "The row (line 3, table row 1) had too few columns (expected 3, got 2)."
             r3.at 'a' . to_vector . should_equal ['1', '0', '4']
             r3.at 'b' . to_vector . should_equal ['2', '0', '5']
@@ -323,7 +394,7 @@ add_specs suite_builder =
 
         group_builder.specify "should aggregate invalid rows over some limit" <|
             action on_problems =
-                Data.read (enso_project.data / "many_invalid_rows.csv") (..Delimited "," headers=True keep_invalid_rows=False value_formatter=Nothing) on_problems
+                Data.read (enso_project.data / "many_invalid_rows.csv") (..Delimited "," headers=True on_invalid_rows=False value_formatter=Nothing) on_problems
 
             tester table =
                 table.columns.map .name . should_equal ['a', 'b', 'c']