From 9d4b110bc61bf42e8877a8198e1dbb4590156c09 Mon Sep 17 00:00:00 2001 From: Jon Degenhardt Date: Thu, 4 Mar 2021 03:27:24 -0800 Subject: [PATCH] Line buffering updates. General fixes, tsv-select support. (#334) --- bash_completion/tsv-utils | 4 +- common/src/tsv_utils/common/utils.d | 99 +++++++++++++++++++------ csv2tsv/src/tsv_utils/csv2tsv.d | 2 +- csv2tsv/tests/gold/error_tests_1.txt | 6 +- tsv-filter/src/tsv_utils/tsv-filter.d | 6 +- tsv-filter/tests/gold/error_tests_1.txt | 8 +- tsv-join/tests/gold/error_tests_1.txt | 2 +- tsv-sample/tests/gold/error_tests_1.txt | 4 +- tsv-sample/tests/gold/error_tests_2.txt | 2 +- tsv-select/src/tsv_utils/tsv-select.d | 32 +++++--- tsv-select/tests/gold/error_tests_1.txt | 12 +-- tsv-uniq/tests/gold/error_tests_1.txt | 4 +- 12 files changed, 123 insertions(+), 58 deletions(-) diff --git a/bash_completion/tsv-utils b/bash_completion/tsv-utils index 3020640f..697def0f 100644 --- a/bash_completion/tsv-utils +++ b/bash_completion/tsv-utils @@ -98,7 +98,7 @@ _tsv_filter() COMPREPLY=() cur="${COMP_WORDS[COMP_CWORD]}" prev="${COMP_WORDS[COMP_CWORD-1]}" - opts="--help --help-verbose --help-fields --help-options --version --header --or --invert --count --delimiter --empty --not-empty --blank --not-blank --is-numeric --is-finite --is-nan --is-infinity --le --lt --ge --gt --eq --ne --str-le --str-lt --str-ge --str-gt --str-eq --istr-eq --str-ne --istr-ne --str-in-fld --istr-in-fld --str-not-in-fld --istr-not-in-fld --regex --iregex --not-regex --not-iregex --char-len-le --char-len-lt --char-len-ge --char-len-gt --char-len-eq --char-len-ne --byte-len-le --byte-len-lt --byte-len-ge --byte-len-gt --byte-len-eq --byte-len-ne --ff-le --ff-lt --ff-ge --ff-gt --ff-eq --ff-ne --ff-str-eq --ff-istr-eq --ff-str-ne --ff-istr-ne --ff-absdiff-le --ff-absdiff-gt ff-reldiff-le --ff-reldiff-gt" + opts="--help --help-verbose --help-fields --help-options --version --header --or --invert --count --delimiter --line-buffered --empty --not-empty --blank --not-blank --is-numeric --is-finite --is-nan --is-infinity --le --lt --ge --gt --eq --ne --str-le --str-lt --str-ge --str-gt --str-eq --istr-eq --str-ne --istr-ne --str-in-fld --istr-in-fld --str-not-in-fld --istr-not-in-fld --regex --iregex --not-regex --not-iregex --char-len-le --char-len-lt --char-len-ge --char-len-gt --char-len-eq --char-len-ne --byte-len-le --byte-len-lt --byte-len-ge --byte-len-gt --byte-len-eq --byte-len-ne --ff-le --ff-lt --ff-ge --ff-gt --ff-eq --ff-ne --ff-str-eq --ff-istr-eq --ff-str-ne --ff-istr-ne --ff-absdiff-le --ff-absdiff-gt ff-reldiff-le --ff-reldiff-gt" # Options requiring an argument or precluding other options case $prev in @@ -206,7 +206,7 @@ _tsv_select() COMPREPLY=() cur="${COMP_WORDS[COMP_CWORD]}" prev="${COMP_WORDS[COMP_CWORD-1]}" - opts="--help --help-verbose --help-fields --version --header --fields --exclude --rest --delimiter" + opts="--help --help-verbose --help-fields --version --header --fields --exclude --rest --delimiter --line-buffered" # Options requiring an argument or precluding other options # Options with a restricted set of arguments (ie. -r|--rest) have their own case clause. diff --git a/common/src/tsv_utils/common/utils.d b/common/src/tsv_utils/common/utils.d index cb1ec797..f16d3384 100644 --- a/common/src/tsv_utils/common/utils.d +++ b/common/src/tsv_utils/common/utils.d @@ -399,8 +399,9 @@ constructor when mixing specific setting with defaults. */ enum BufferedOutputRangeDefaults { - reserveSize = 11264, flushSize = 10240, + lineBufferedFlushSize = 1, + reserveSize = 11264, maxSize = 4194304 } @@ -416,8 +417,12 @@ lines, as it blocks many writes together in a single write. The internal buffer is written to the output stream after flushSize has been reached. This is checked at newline boundaries, when appendln is called or when put is called with a single newline character. Other writes check maxSize, which is used to avoid -runaway buffers. An implication is that line buffering can be achieved on by specifying -flushsize as 1. +runaway buffers. + +This scheme only flushes the internal buffer, it does not flush the output stream. +Use flush() to flush both the internal buffer and the output stream. Specify flushSize +as BufferedOutputRangeDefaults.lineBufferedFlushSize in the constructor to get line +buffering with immediate flushes to the output stream. BufferedOutputRange has a put method allowing it to be used a range. It has a number of other methods providing additional control. @@ -437,13 +442,12 @@ $(LIST * `joinAppend(inputRange, delim)` - An optimization of `append(inputRange.joiner(delim))`. For reasons that are not clear, joiner is quite slow. - * `flushIfFull()` - Flush the internal buffer to the output stream if flushSize has been - reached. - - * `flush()` - Write the internal buffer to the output stream. + * `flush()` - Write the internal buffer to the output stream and flush the output stream. * `put(stuff)` - Appends to the internal buffer. Acts as `appendln()` if passed a single newline character, '\n' or "\n". + + * `flushBuffer()` - This flushes both the internal buffers and the output stream. ) The internal buffer is automatically flushed when the BufferedOutputRange goes out of @@ -489,25 +493,40 @@ if (isFileHandle!(Unqual!OutputTarget) || isOutputRange!(Unqual!OutputTarget, ch flush(); } - void flush() + private void flushBuffer() { - static if (isFileHandle!OutputTarget) _outputTarget.rawWrite(_outputBuffer.data); + static if (isFileHandle!OutputTarget) + { + _outputTarget.rawWrite(_outputBuffer.data); + + if (_flushSize == BufferedOutputRangeDefaults.lineBufferedFlushSize) + { + _outputTarget.flush(); + } + } else _outputTarget.put(_outputBuffer.data); _outputBuffer.clear; } - bool flushIfFull() + void flush() + { + flushBuffer(); + static if (isFileHandle!OutputTarget) _outputTarget.flush(); + } + + /* flushIfFull flushes the internal buffer if flushSize has been reached. */ + private bool flushIfFull() { bool isFull = _outputBuffer.data.length >= _flushSize; - if (isFull) flush(); + if (isFull) flushBuffer(); return isFull; } /* flushIfMaxSize is a safety check to avoid runaway buffer growth. */ - void flushIfMaxSize() + private void flushIfMaxSize() { - if (_outputBuffer.data.length >= _maxSize) flush(); + if (_outputBuffer.data.length >= _maxSize) flushBuffer(); } /* maybeFlush is intended for the case where put is called with a trailing newline. @@ -525,7 +544,6 @@ if (isFileHandle!(Unqual!OutputTarget) || isOutputRange!(Unqual!OutputTarget, ch return doFlush; } - private void appendRaw(T)(T stuff) pure { import std.range : rangePut = put; @@ -869,17 +887,17 @@ if (is(Char == char) || is(Char == ubyte)) * - _lineEnd - End of current line. */ private File _file; + private immutable LineBuffered _lineBuffered; private ubyte[] _buffer; private size_t _lineStart = 0; private size_t _lineEnd = 0; private size_t _dataEnd = 0; - private LineBuffered _lineBuffered; this (File f, LineBuffered lineBuffered) { _file = f; - _buffer = new ubyte[readSize + growSize]; _lineBuffered = lineBuffered; + _buffer = new ubyte[readSize + growSize]; } bool empty() const pure @@ -2036,10 +2054,10 @@ byLineSourceRange is a helper function for creating new byLineSourceRange object */ auto byLineSourceRange( KeepTerminator keepTerminator = No.keepTerminator, Char = char, ubyte terminator = '\n') -(string[] filepaths) +(string[] filepaths, LineBuffered lineBuffered = No.lineBuffered) if (is(Char == char) || is(Char == ubyte)) { - return new ByLineSourceRange!(keepTerminator, Char, terminator)(filepaths); + return new ByLineSourceRange!(keepTerminator, Char, terminator)(filepaths, lineBuffered); } /** @@ -2077,16 +2095,18 @@ if (is(Char == char) || is(Char == ubyte)) alias ByLineSourceType = ByLineSource!(keepTerminator, char, terminator); private string[] _filepaths; + private immutable LineBuffered _lineBuffered; private ByLineSourceType _front; - this(string[] filepaths) + this(string[] filepaths, LineBuffered lineBuffered = No.lineBuffered) { _filepaths = filepaths.dup; + _lineBuffered = lineBuffered; _front = null; if (!_filepaths.empty) { - _front = new ByLineSourceType(_filepaths.front); + _front = new ByLineSourceType(_filepaths.front, _lineBuffered); _front.open; _filepaths.popFront; } @@ -2116,7 +2136,7 @@ if (is(Char == char) || is(Char == ubyte)) if (!_filepaths.empty) { - _front = new ByLineSourceType(_filepaths.front); + _front = new ByLineSourceType(_filepaths.front, _lineBuffered); _front.open; _filepaths.popFront; } @@ -2162,15 +2182,17 @@ if (is(Char == char) || is(Char == ubyte)) alias ByLineType = ReturnType!newByLineFn; private immutable string _filepath; + private immutable LineBuffered _lineBuffered; private immutable bool _isStdin; private bool _isOpen; private bool _hasBeenOpened; private File _file; private ByLineType _byLineRange; - private this(string filepath) pure nothrow @safe + private this(string filepath, LineBuffered lineBuffered = No.lineBuffered) pure nothrow @safe { _filepath = filepath; + _lineBuffered = lineBuffered; _isStdin = filepath == "-"; _isOpen = false; _hasBeenOpened = false; @@ -2229,7 +2251,7 @@ if (is(Char == char) || is(Char == ubyte)) assert(!_hasBeenOpened); _file = isStdin ? stdin : _filepath.File("rb"); - _byLineRange = newByLineFn(_file); + _byLineRange = newByLineFn(_file, _lineBuffered); _isOpen = true; _hasBeenOpened = true; } @@ -2363,6 +2385,37 @@ unittest /* The ByLineSourceRange is a reference range, consumed by the foreach. */ assert(inputSourcesYesTerminator.empty); + + /* Using Yes.keepTerminator, Yes.lineBuffered. */ + readSourcesYesTerminator.clear; + auto inputSourcesYesTerminatorYesLineBuffered = + byLineSourceRange!(Yes.keepTerminator)(inputFiles[0 .. numFiles], Yes.lineBuffered); + assert(inputSourcesYesTerminatorYesLineBuffered.length == numFiles); + + foreach(fileNum, source; inputSourcesYesTerminatorYesLineBuffered.enumerate) + { + readSourcesYesTerminator.put(source); + assert(source.isOpen); + assert(source._file.isOpen); + assert(readSourcesYesTerminator.data[0 .. fileNum].all!(s => !s.isOpen)); + assert(readSourcesYesTerminator.data[fileNum].isOpen); + + assert(source.byLine.empty || source.byLine.front == fileHeaders[fileNum]); + + assert(source.name == inputFiles[fileNum]); + assert(!source.isStdin); + + auto readFileData = appender!(char[]); + foreach(line; source.byLine) + { + readFileData.put(line); + } + + assert(readFileData.data == fileData[fileNum]); + } + + /* The ByLineSourceRange is a reference range, consumed by the foreach. */ + assert(inputSourcesYesTerminatorYesLineBuffered.empty); } /* Empty filelist. */ diff --git a/csv2tsv/src/tsv_utils/csv2tsv.d b/csv2tsv/src/tsv_utils/csv2tsv.d index 9404a47f..5dd0c044 100644 --- a/csv2tsv/src/tsv_utils/csv2tsv.d +++ b/csv2tsv/src/tsv_utils/csv2tsv.d @@ -187,7 +187,7 @@ else catch (Exception exc) { writeln(); - stdin.flush(); + stdout.flush(); stderr.writefln("Error [%s]: %s", cmdopt.programName, exc.msg); return 1; } diff --git a/csv2tsv/tests/gold/error_tests_1.txt b/csv2tsv/tests/gold/error_tests_1.txt index 9ddf4bd1..23980a7c 100644 --- a/csv2tsv/tests/gold/error_tests_1.txt +++ b/csv2tsv/tests/gold/error_tests_1.txt @@ -2,8 +2,8 @@ Error test set 1 ---------------- ====[csv2tsv nosuchfile.txt]==== -Error [csv2tsv]: Cannot open file `nosuchfile.txt' in mode `rb' (No such file or directory) +Error [csv2tsv]: Cannot open file `nosuchfile.txt' in mode `rb' (No such file or directory) ====[csv2tsv --nosuchparam input1.txt]==== [csv2tsv] Error processing command line arguments: Unrecognized option --nosuchparam @@ -60,14 +60,14 @@ Error [csv2tsv]: Cannot open file `nosuchfile.txt' in mode `rb' (No such file or [csv2tsv] Error processing command line arguments: Replacement character cannot contain newlines or TSV field delimiters (--r|tab-replacement). ====[csv2tsv invalid1.csv]==== -Error [csv2tsv]: Invalid CSV. Improperly terminated quoted field. File: invalid1.csv, Line: 3 field1 field2 field3 100 ab c de f 200 gh i, +Error [csv2tsv]: Invalid CSV. Improperly terminated quoted field. File: invalid1.csv, Line: 3 ====[csv2tsv invalid2.csv]==== -Error [csv2tsv]: Invalid CSV. Improperly terminated quoted field. File: invalid2.csv, Line: 4 field1 field2 field3 100 ab c de f 200 gh i jk l 300 mn o pq r +Error [csv2tsv]: Invalid CSV. Improperly terminated quoted field. File: invalid2.csv, Line: 4 diff --git a/tsv-filter/src/tsv_utils/tsv-filter.d b/tsv-filter/src/tsv_utils/tsv-filter.d index 67a28e58..d98fc4cb 100644 --- a/tsv-filter/src/tsv_utils/tsv-filter.d +++ b/tsv-filter/src/tsv_utils/tsv-filter.d @@ -1013,8 +1013,9 @@ void tsvFilter(ref TsvFilterOptions cmdopt) /* BufferedOutputRange improves performance on narrow files with high percentages of * writes. */ - immutable size_t flushSize = - cmdopt.lineBuffered ? 1 : BufferedOutputRangeDefaults.reserveSize; + immutable size_t flushSize = cmdopt.lineBuffered ? + BufferedOutputRangeDefaults.lineBufferedFlushSize : + BufferedOutputRangeDefaults.flushSize; auto bufferedOutput = BufferedOutputRange!(typeof(stdout))(stdout, flushSize); size_t matchedLines = 0; @@ -1086,6 +1087,7 @@ void tsvFilter(ref TsvFilterOptions cmdopt) } catch (Exception e) { + bufferedOutput.flush; throw new Exception( format("Could not process line or field: %s\n File: %s Line: %s%s", e.msg, inputStream.name, lineNum, diff --git a/tsv-filter/tests/gold/error_tests_1.txt b/tsv-filter/tests/gold/error_tests_1.txt index 0a0ce9a4..13131892 100644 --- a/tsv-filter/tests/gold/error_tests_1.txt +++ b/tsv-filter/tests/gold/error_tests_1.txt @@ -20,8 +20,8 @@ Error test set 1 Expected: '--eq :' or '--eq : where is a number. ====[tsv-filter --header --le 1000:10 input1.tsv]==== -Error [tsv-filter]: Not enough fields in line. File: input1.tsv, Line: 2 F1 F2 F3 F4 +Error [tsv-filter]: Not enough fields in line. File: input1.tsv, Line: 2 ====[tsv-filter --header --le 1: input1.tsv]==== [tsv-filter] Error processing command line arguments: Invalid option: [--le 1:]. No value after field list. @@ -296,12 +296,12 @@ Error [tsv-filter]: Windows/DOS line ending found. Convert file to Unix newlines File: input1_dos.tsv, Line: 1 ====[tsv-filter --header --eq 2:1 input1.tsv input1_dos.tsv]==== -Error [tsv-filter]: Windows/DOS line ending found. Convert file to Unix newlines before processing (e.g. 'dos2unix'). - File: input1_dos.tsv, Line: 1 F1 F2 F3 F4 1 1.0 a A +Error [tsv-filter]: Windows/DOS line ending found. Convert file to Unix newlines before processing (e.g. 'dos2unix'). + File: input1_dos.tsv, Line: 1 ====[tsv-filter --str-eq 4:ABC input1.tsv input1_dos.tsv]==== +10 10.1 abc ABC Error [tsv-filter]: Windows/DOS line ending found. Convert file to Unix newlines before processing (e.g. 'dos2unix'). File: input1_dos.tsv, Line: 1 -10 10.1 abc ABC diff --git a/tsv-join/tests/gold/error_tests_1.txt b/tsv-join/tests/gold/error_tests_1.txt index 4690c266..a85e4c16 100644 --- a/tsv-join/tests/gold/error_tests_1.txt +++ b/tsv-join/tests/gold/error_tests_1.txt @@ -52,8 +52,8 @@ Error [tsv-join]: Not enough fields in line. File: input1.tsv, Line: 1 Error [tsv-join]: Not enough fields in line. File: input1.tsv, Line: 1 ====[tsv-join --header -f input1.tsv -k 4 -d 6 input2.tsv]==== -Error [tsv-join]: Not enough fields in line. File: input2.tsv, Line: 2 f1 f2 f3 f4 f5 +Error [tsv-join]: Not enough fields in line. File: input2.tsv, Line: 2 ====[tsv-join -f input1_noheader.tsv -k 6 input2_noheader.tsv]==== Error [tsv-join]: Not enough fields in line. File: input1_noheader.tsv, Line: 1 diff --git a/tsv-sample/tests/gold/error_tests_1.txt b/tsv-sample/tests/gold/error_tests_1.txt index 2e746ef3..842fc4c6 100644 --- a/tsv-sample/tests/gold/error_tests_1.txt +++ b/tsv-sample/tests/gold/error_tests_1.txt @@ -11,9 +11,9 @@ Error test set 1 [tsv-sample] Error processing command line arguments: Invalid UTF-8 sequence (at index 1) ====[tsv-sample -H -w 11 input3x25.tsv]==== +line title weight Error [tsv-sample]: Could not process line: Not enough fields on line. Number required: 11; Number found: 3 File: input3x25.tsv Line: 2 -line title weight ====[tsv-sample -H -w 0 input3x25.tsv]==== [tsv-sample] Error processing command line arguments: [--w|weight-field] Field numbers must be greater than zero: '0'. @@ -70,8 +70,8 @@ line title weight Error [tsv-sample]: Not enough fields in line. File: input4x50.tsv, Line: 1 ====[tsv-sample -H -p 0.5 -k 5 input4x50.tsv input4x15.tsv]==== -Error [tsv-sample]: Not enough fields in line. File: input4x50.tsv, Line: 2 c-1 c-2 c-3 c-4 +Error [tsv-sample]: Not enough fields in line. File: input4x50.tsv, Line: 2 ====[tsv-sample -H -p 0.5 -k no_such_field input4x50.tsv input4x15.tsv]==== [tsv-sample] Error processing command line arguments: [--k|key-fields] Field not found in file header: 'no_such_field'. diff --git a/tsv-sample/tests/gold/error_tests_2.txt b/tsv-sample/tests/gold/error_tests_2.txt index 062c4f96..ba8a508a 100644 --- a/tsv-sample/tests/gold/error_tests_2.txt +++ b/tsv-sample/tests/gold/error_tests_2.txt @@ -2,9 +2,9 @@ Error test set 2 ---------------- ====[tsv-sample -H -w 2 input3x25.tsv]==== +line title weight Error [tsv-sample]: Could not process line: no digits seen for input "Белые ночи". File: input3x25.tsv Line: 2 -line title weight ====[tsv-sample -w 3 input3x25.tsv]==== Error [tsv-sample]: Could not process line: no digits seen for input "weight". diff --git a/tsv-select/src/tsv_utils/tsv-select.d b/tsv-select/src/tsv_utils/tsv-select.d index 4ee33186..b67a99ae 100644 --- a/tsv-select/src/tsv_utils/tsv-select.d +++ b/tsv-select/src/tsv_utils/tsv-select.d @@ -141,6 +141,8 @@ Notes: with '--f|fields'. This is not necessary for '--e|exclude' fields. * Specifying names of fields containing special characters may require escaping the special characters. See '--help-fields' for details. +* Output is buffered by default to improve performance. Use + '--line-buffered' to have each line immediately written out. Options: EOS"; @@ -149,7 +151,7 @@ EOS"; */ struct TsvSelectOptions { - import tsv_utils.common.utils : byLineSourceRange, ByLineSourceRange; + import tsv_utils.common.utils : byLineSourceRange, ByLineSourceRange, LineBuffered; // The allowed values for the --rest option. enum RestOption { none, first, last}; @@ -158,6 +160,7 @@ struct TsvSelectOptions ByLineSourceRange!() inputSources; /// Input Files bool hasHeader = false; /// --H|header char delim = '\t'; /// --d|delimiter + bool lineBuffered = false; /// --line-buffered RestOption restArg; /// --rest first|last (none is hidden default) size_t[] fields; /// Derived from --f|fields bool[] excludedFieldsTable; /// Derived. Lookup table for excluded fields. @@ -231,6 +234,10 @@ struct TsvSelectOptions "CHR Character to use as field delimiter. Default: TAB. (Single byte UTF-8 characters only.)", &delim, + "line-buffered", + " Immediately output every line.", + &lineBuffered, + std.getopt.config.caseSensitive, "V|version", " Print version information and exit.", @@ -342,7 +349,8 @@ struct TsvSelectOptions /* * Create the byLineSourceRange and perform header line processing. */ - inputSources = byLineSourceRange(filepaths); + immutable LineBuffered isLineBuffered = lineBuffered ? Yes.lineBuffered : No.lineBuffered; + inputSources = byLineSourceRange(filepaths, isLineBuffered); if (hasHeader) { @@ -435,12 +443,17 @@ enum RestLocation { none, first, last }; * instantiates this function three times, once for each of the --rest options. It results * in a larger program, but is faster. Run-time improvements of 25% were measured compared * to the non-templatized version. (Note: 'cte' stands for 'compile time evaluation'.) + * + * Note: tsv-select does not immediately flush the header line like most other tsv-utils + * tools. This is due to a limitation in ByLineSourceRange. It does not read the header + * separately, it waits until the first full buffer is read. For tsv-select this leaves no + * material advantage to flushing the header line early. */ void tsvSelect(RestLocation rest)(ref TsvSelectOptions cmdopt) { - import tsv_utils.common.utils: BufferedOutputRange, ByLineSourceRange, - InputFieldReordering, throwIfWindowsNewline; + import tsv_utils.common.utils: BufferedOutputRange, BufferedOutputRangeDefaults, + ByLineSourceRange, InputFieldReordering, LineBuffered, throwIfWindowsNewline; import std.algorithm: splitter; import std.array : appender, Appender; import std.format: format; @@ -484,7 +497,10 @@ void tsvSelect(RestLocation rest)(ref TsvSelectOptions cmdopt) /* BufferedOutputRange (from common/utils.d) is a performance improvement over * writing directly to stdout. */ - auto bufferedOutput = BufferedOutputRange!(typeof(stdout))(stdout); + immutable size_t flushSize = cmdopt.lineBuffered ? + BufferedOutputRangeDefaults.lineBufferedFlushSize : + BufferedOutputRangeDefaults.flushSize; + auto bufferedOutput = BufferedOutputRange!(typeof(stdout))(stdout, flushSize); /* Read each input file (or stdin) and iterate over each line. */ @@ -585,12 +601,6 @@ void tsvSelect(RestLocation rest)(ref TsvSelectOptions cmdopt) } bufferedOutput.appendln; - - /* Send the first line of the first file immediately. This helps detect - * errors quickly in multi-stage unix pipelines. Note that tsv-select may - * have been sent one line from an upstream process, usually a header line. - */ - if (lineNum == 1 && fileNum == 0) bufferedOutput.flush; } } } diff --git a/tsv-select/tests/gold/error_tests_1.txt b/tsv-select/tests/gold/error_tests_1.txt index 7140f84b..a36c1c38 100644 --- a/tsv-select/tests/gold/error_tests_1.txt +++ b/tsv-select/tests/gold/error_tests_1.txt @@ -20,14 +20,14 @@ Error test set 1 [tsv-select] Error processing command line arguments: Cannot open file `nosuchfile.tsv' in mode `rb' (No such file or directory) ====[tsv-select -f 1 input_3x1.tsv nosuchfile.tsv]==== -Error [tsv-select]: Cannot open file `nosuchfile.tsv' in mode `rb' (No such file or directory) f1 3x1-r1 +Error [tsv-select]: Cannot open file `nosuchfile.tsv' in mode `rb' (No such file or directory) ====[tsv-select -f 1,4 input_3plus_fields.tsv]==== -Error [tsv-select]: Not enough fields in line. File: input_3plus_fields.tsv, Line: 3 1 101 2 5734 +Error [tsv-select]: Not enough fields in line. File: input_3plus_fields.tsv, Line: 3 ====[tsv-select -d ß -f 1 input1.tsv]==== [tsv-select] Error processing command line arguments: Invalid UTF-8 sequence (at index 1) @@ -135,8 +135,6 @@ Error [tsv-select]: Windows/DOS line ending found. Convert file to Unix newlines File: input1_dos.tsv, Line: 1 ====[tsv-select -f 1 input1.tsv input1_dos.tsv]==== -Error [tsv-select]: Windows/DOS line ending found. Convert file to Unix newlines before processing (e.g. 'dos2unix'). - File: input1_dos.tsv, Line: 1 f1 1 @@ -146,10 +144,10 @@ f1 6 7 8 - -====[tsv-select -H -f 1 input1.tsv input1_dos.tsv]==== Error [tsv-select]: Windows/DOS line ending found. Convert file to Unix newlines before processing (e.g. 'dos2unix'). File: input1_dos.tsv, Line: 1 + +====[tsv-select -H -f 1 input1.tsv input1_dos.tsv]==== f1 1 @@ -159,3 +157,5 @@ f1 6 7 8 +Error [tsv-select]: Windows/DOS line ending found. Convert file to Unix newlines before processing (e.g. 'dos2unix'). + File: input1_dos.tsv, Line: 1 diff --git a/tsv-uniq/tests/gold/error_tests_1.txt b/tsv-uniq/tests/gold/error_tests_1.txt index 3b2e95bd..1412ffd0 100644 --- a/tsv-uniq/tests/gold/error_tests_1.txt +++ b/tsv-uniq/tests/gold/error_tests_1.txt @@ -74,9 +74,9 @@ Error [tsv-uniq]: Not enough fields in line. File: input1.tsv, Line: 1 [tsv-uniq] Error processing command line arguments: --number-header requires --z|number ====[tsv-uniq -H -f 2,30 input1.tsv]==== -Error [tsv-uniq]: Not enough fields in line. File: input1.tsv, Line: 2 f1 f2 f3 f4 f5 +Error [tsv-uniq]: Not enough fields in line. File: input1.tsv, Line: 2 ====[tsv-uniq -H -f 2-30 input1.tsv]==== -Error [tsv-uniq]: Not enough fields in line. File: input1.tsv, Line: 2 f1 f2 f3 f4 f5 +Error [tsv-uniq]: Not enough fields in line. File: input1.tsv, Line: 2