From 27935f0c5e004239ceba8142c4e40f0429784e6d Mon Sep 17 00:00:00 2001 From: Jon Degenhardt Date: Wed, 3 Mar 2021 01:42:07 -0800 Subject: [PATCH] tsv-filter --line-buffered (#333) * [WIP] POC code for --line-buffered. Needs unit tests; bufferedOutputRange support. * [WIP] Switch to output line-buffering via BufferedOutputRange. Some unit test prep. * [WIP] Line buffering: small fixes. * line buffering: Unit tests. * tsv-filter line-buffered unit tests. * tsv-filter line-buffering: Minor code refactor. --- common/src/tsv_utils/common/utils.d | 236 ++++++++++++++++++------ tsv-filter/src/tsv_utils/tsv-filter.d | 39 ++-- tsv-filter/tests/gold/basic_tests_1.txt | 86 +++++++++ tsv-filter/tests/tests.sh | 14 ++ 4 files changed, 300 insertions(+), 75 deletions(-) diff --git a/common/src/tsv_utils/common/utils.d b/common/src/tsv_utils/common/utils.d index 491e605..cb1ec79 100644 --- a/common/src/tsv_utils/common/utils.d +++ b/common/src/tsv_utils/common/utils.d @@ -392,6 +392,18 @@ if (isSomeChar!C) } } +/** +BufferedOutputRangeDefaults defines the parameter defaults used by +BufferedOutputRange. These can be passed to the BufferedOutputRange +constructor when mixing specific setting with defaults. + */ +enum BufferedOutputRangeDefaults +{ + reserveSize = 11264, + flushSize = 10240, + maxSize = 4194304 +} + /** BufferedOutputRange is a performance enhancement over writing directly to an output stream. It holds a File open for write or an OutputRange. Ouput is accumulated in an @@ -404,7 +416,8 @@ lines, as it blocks many writes together in a single write. The internal buffer is written to the output stream after flushSize has been reached. This is checked at newline boundaries, when appendln is called or when put is called with a single newline character. Other writes check maxSize, which is used to avoid -runaway buffers. +runaway buffers. An implication is that line buffering can be achieved on by specifying +flushsize as 1. BufferedOutputRange has a put method allowing it to be used a range. It has a number of other methods providing additional control. @@ -453,19 +466,15 @@ if (isFileHandle!(Unqual!OutputTarget) || isOutputRange!(Unqual!OutputTarget, ch } else static assert(false); - private enum defaultReserveSize = 11264; - private enum defaultFlushSize = 10240; - private enum defaultMaxSize = 4194304; - private OutputTarget _outputTarget; private auto _outputBuffer = appender!(C[]); private immutable size_t _flushSize; private immutable size_t _maxSize; this(OutputTarget outputTarget, - size_t flushSize = defaultFlushSize, - size_t reserveSize = defaultReserveSize, - size_t maxSize = defaultMaxSize) + size_t flushSize = BufferedOutputRangeDefaults.flushSize, + size_t reserveSize = BufferedOutputRangeDefaults.reserveSize, + size_t maxSize = BufferedOutputRangeDefaults.maxSize) { assert(flushSize <= maxSize); @@ -824,6 +833,9 @@ enum bool isFlushableOutputRange(R, E=char) = isOutputRange!(R, E) static assert(isFlushableOutputRange!(BufferedOutputRange!(Appender!(char[])), char)); } +/** Flag accepted by bufferedByLine to use line-buffering. + */ +alias LineBuffered = Flag!"lineBuffered"; /** bufferedByLine is a performance enhancement over std.stdio.File.byLine. It works by @@ -836,12 +848,14 @@ rather than a runtime parameter. Reading in blocks does mean that input is not read until a full buffer is available or end-of-file is reached. For this reason, bufferedByLine is not appropriate for -interactive input. +interactive input. Note though that line-buffering can be achieved by specifying via +the lineBuffered parameter. In this mode bufferedByLine reads each line as soon as it +is available. */ auto bufferedByLine(KeepTerminator keepTerminator = No.keepTerminator, Char = char, ubyte terminator = '\n', size_t readSize = 1024 * 128, size_t growSize = 1024 * 16) - (File file) + (File file, LineBuffered lineBuffered = No.lineBuffered) if (is(Char == char) || is(Char == ubyte)) { static assert(0 < growSize && growSize <= readSize); @@ -859,11 +873,13 @@ if (is(Char == char) || is(Char == ubyte)) private size_t _lineStart = 0; private size_t _lineEnd = 0; private size_t _dataEnd = 0; + private LineBuffered _lineBuffered; - this (File f) + this (File f, LineBuffered lineBuffered) { _file = f; _buffer = new ubyte[readSize + growSize]; + _lineBuffered = lineBuffered; } bool empty() const pure @@ -887,12 +903,30 @@ if (is(Char == char) || is(Char == ubyte)) } } - /* Note: Call popFront at initialization to do the initial read. */ void popFront() { - import std.algorithm: copy, find; assert(!empty, "Attempt to popFront an empty bufferedByLine."); + if (!_lineBuffered) popFrontFullBuffered(); + else popFrontLineBuffered(); + } + + private void popFrontLineBuffered() + { + char[] line = cast(char[]) _buffer; + _lineStart = 0; + _lineEnd = _dataEnd = _file.readln(line); + if (line.length > _buffer.length) _buffer = cast(ubyte[]) line; + + assert(_lineEnd == line.length); + assert(_dataEnd == line.length); + } + + /* Note: Call popFront at initialization to do the initial read. */ + private void popFrontFullBuffered() + { + import std.algorithm: copy, find; + /* Pop the current line. */ _lineStart = _lineEnd; @@ -950,7 +984,7 @@ if (is(Char == char) || is(Char == ubyte)) assert(file.isOpen, "bufferedByLine passed a closed file."); - auto r = new BufferedByLineImpl(file); + auto r = new BufferedByLineImpl(file, lineBuffered); if (!r.empty) r.popFront; return r; } @@ -969,8 +1003,9 @@ unittest auto testDir = makeUnittestTempDir("tsv_utils_buffered_byline"); scope(exit) testDir.rmdirRecurse; - /* Create two data files with the same data. Read both in parallel with byLine and - * bufferedByLine and compare each line. + /* Create three data files with the same data. Read ech in parallel with byLine and + * bufferedByLine and compare each line. bufferedByLine is run in both full buffered + * and line buffered modes. */ auto data1 = appender!(char[])(); @@ -985,6 +1020,7 @@ unittest string file1a = buildPath(testDir, "file1a.txt"); string file1b = buildPath(testDir, "file1b.txt"); + string file1c = buildPath(testDir, "file1c.txt"); { auto f1aFH = file1a.File("wb"); f1aFH.write(data1.data); @@ -993,43 +1029,68 @@ unittest auto f1bFH = file1b.File("wb"); f1bFH.write(data1.data); f1bFH.close; + + auto f1cFH = file1c.File("wb"); + f1cFH.write(data1.data); + f1cFH.close; } - /* Default parameters. */ + /* Default template parameters. */ { auto f1aFH = file1a.File(); auto f1bFH = file1b.File(); - auto f1aIn = f1aFH.bufferedByLine!(No.keepTerminator); - auto f1bIn = f1bFH.byLine(No.keepTerminator); + auto f1cFH = file1c.File(); + auto f1aIn = f1aFH.byLine(No.keepTerminator); + auto f1bIn = f1bFH.bufferedByLine!(No.keepTerminator); + auto f1cIn = f1cFH.bufferedByLine!(No.keepTerminator)(Yes.lineBuffered); - foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); + foreach (a, b, c; lockstep(f1aIn, f1bIn, f1cIn, StoppingPolicy.requireSameLength)) + { + assert(a == b); + assert(a == c); + } f1aFH.close; f1bFH.close; + f1cFH.close; } { auto f1aFH = file1a.File(); auto f1bFH = file1b.File(); - auto f1aIn = f1aFH.bufferedByLine!(Yes.keepTerminator); - auto f1bIn = f1bFH.byLine(Yes.keepTerminator); + auto f1cFH = file1c.File(); + auto f1aIn = f1aFH.byLine(Yes.keepTerminator); + auto f1bIn = f1bFH.bufferedByLine!(Yes.keepTerminator)(No.lineBuffered); + auto f1cIn = f1cFH.bufferedByLine!(Yes.keepTerminator)(Yes.lineBuffered); - foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); + foreach (a, b, c; lockstep(f1aIn, f1bIn, f1cIn, StoppingPolicy.requireSameLength)) + { + assert(a == b); + assert(a == c); + } f1aFH.close; f1bFH.close; + f1cFH.close; } /* Smaller read size. This will trigger buffer growth. */ { auto f1aFH = file1a.File(); auto f1bFH = file1b.File(); - auto f1aIn = f1aFH.bufferedByLine!(No.keepTerminator, char, '\n', 512, 256); - auto f1bIn = f1bFH.byLine(No.keepTerminator); + auto f1cFH = file1c.File(); + auto f1aIn = f1aFH.byLine(No.keepTerminator); + auto f1bIn = f1bFH.bufferedByLine!(No.keepTerminator, char, '\n', 512, 256); + auto f1cIn = f1cFH.bufferedByLine!(No.keepTerminator, char, '\n', 512, 256)(Yes.lineBuffered); - foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); + foreach (a, b, c; lockstep(f1aIn, f1bIn, f1cIn, StoppingPolicy.requireSameLength)) + { + assert(a == b); + assert(a == c); + } f1aFH.close; f1bFH.close; + f1cFH.close; } /* Exercise boundary cases in buffer growth. @@ -1041,25 +1102,39 @@ unittest {{ auto f1aFH = file1a.File(); auto f1bFH = file1b.File(); - auto f1aIn = f1aFH.bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize); - auto f1bIn = f1bFH.byLine(No.keepTerminator); + auto f1cFH = file1c.File(); + auto f1aIn = f1aFH.byLine(No.keepTerminator); + auto f1bIn = f1bFH.bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize)(No.lineBuffered); + auto f1cIn = f1cFH.bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize)(Yes.lineBuffered); - foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); + foreach (a, b, c; lockstep(f1aIn, f1bIn, f1cIn, StoppingPolicy.requireSameLength)) + { + assert(a == b); + assert(a == c); + } f1aFH.close; f1bFH.close; + f1cFH.close; }} static foreach (growSize; 1 .. readSize + 1) {{ auto f1aFH = file1a.File(); auto f1bFH = file1b.File(); - auto f1aIn = f1aFH.bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize); - auto f1bIn = f1bFH.byLine(Yes.keepTerminator); + auto f1cFH = file1c.File(); + auto f1aIn = f1aFH.byLine(Yes.keepTerminator); + auto f1bIn = f1bFH.bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize); + auto f1cIn = f1cFH.bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize)(Yes.lineBuffered); - foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); + foreach (a, b, c; lockstep(f1aIn, f1bIn, f1cIn, StoppingPolicy.requireSameLength)) + { + assert(a == b); + assert(a == c); + } f1aFH.close; f1bFH.close; + f1cFH.close; }} } @@ -1068,10 +1143,10 @@ unittest string file2a = buildPath(testDir, "file2a.txt"); string file2b = buildPath(testDir, "file2b.txt"); + string file2c = buildPath(testDir, "file2c.txt"); string file3a = buildPath(testDir, "file3a.txt"); string file3b = buildPath(testDir, "file3b.txt"); - string file4a = buildPath(testDir, "file4a.txt"); - string file4b = buildPath(testDir, "file4b.txt"); + string file3c = buildPath(testDir, "file3c.txt"); { auto f1aFH = file1a.File("wb"); @@ -1083,6 +1158,11 @@ unittest f1bFH.write("a"); f1bFH.close; } + { + auto f1cFH = file1c.File("wb"); + f1cFH.write("a"); + f1cFH.close; + } { auto f2aFH = file2a.File("wb"); f2aFH.write("ab"); @@ -1093,6 +1173,11 @@ unittest f2bFH.write("ab"); f2bFH.close; } + { + auto f2cFH = file2c.File("wb"); + f2cFH.write("ab"); + f2cFH.close; + } { auto f3aFH = file3a.File("wb"); f3aFH.write("abc"); @@ -1103,6 +1188,11 @@ unittest f3bFH.write("abc"); f3bFH.close; } + { + auto f3cFH = file3c.File("wb"); + f3cFH.write("abc"); + f3cFH.close; + } static foreach (readSize; [1, 2, 4]) { @@ -1110,65 +1200,107 @@ unittest {{ auto f1aFH = file1a.File(); auto f1bFH = file1b.File(); - auto f1aIn = f1aFH.bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize); - auto f1bIn = f1bFH.byLine(No.keepTerminator); + auto f1cFH = file1c.File(); + auto f1aIn = f1aFH.byLine(No.keepTerminator); + auto f1bIn = f1bFH.bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize)(No.lineBuffered); + auto f1cIn = f1cFH.bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize)(Yes.lineBuffered); - foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); + foreach (a, b, c; lockstep(f1aIn, f1bIn, f1cIn, StoppingPolicy.requireSameLength)) + { + assert(a == b); + assert(a == c); + } f1aFH.close; f1bFH.close; + f1cFH.close; auto f2aFH = file2a.File(); auto f2bFH = file2b.File(); - auto f2aIn = f2aFH.bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize); - auto f2bIn = f2bFH.byLine(No.keepTerminator); + auto f2cFH = file2c.File(); + auto f2aIn = f2aFH.byLine(No.keepTerminator); + auto f2bIn = f2bFH.bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize)(No.lineBuffered); + auto f2cIn = f2cFH.bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize)(Yes.lineBuffered); - foreach (a, b; lockstep(f2aIn, f2bIn, StoppingPolicy.requireSameLength)) assert(a == b); + foreach (a, b, c; lockstep(f2aIn, f2bIn, f2cIn, StoppingPolicy.requireSameLength)) + { + assert(a == b); + assert(a == c); + } f2aFH.close; f2bFH.close; + f2cFH.close; auto f3aFH = file3a.File(); auto f3bFH = file3b.File(); - auto f3aIn = f3aFH.bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize); - auto f3bIn = f3bFH.byLine(No.keepTerminator); + auto f3cFH = file3c.File(); + auto f3aIn = f3aFH.byLine(No.keepTerminator); + auto f3bIn = f3bFH.bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize)(No.lineBuffered); + auto f3cIn = f3cFH.bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize)(Yes.lineBuffered); - foreach (a, b; lockstep(f3aIn, f3bIn, StoppingPolicy.requireSameLength)) assert(a == b); + foreach (a, b, c; lockstep(f3aIn, f3bIn, f3cIn, StoppingPolicy.requireSameLength)) + { + assert(a == b); + assert(a == c); + } f3aFH.close; f3bFH.close; + f3cFH.close; }} static foreach (growSize; 1 .. readSize + 1) {{ auto f1aFH = file1a.File(); auto f1bFH = file1b.File(); - auto f1aIn = f1aFH.bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize); - auto f1bIn = f1bFH.byLine(Yes.keepTerminator); + auto f1cFH = file1c.File(); + auto f1aIn = f1aFH.byLine(Yes.keepTerminator); + auto f1bIn = f1bFH.bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize)(No.lineBuffered); + auto f1cIn = f1cFH.bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize)(Yes.lineBuffered); - foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); + foreach (a, b, c; lockstep(f1aIn, f1bIn, f1cIn, StoppingPolicy.requireSameLength)) + { + assert(a == b); + assert(a == c); + } f1aFH.close; f1bFH.close; + f1cFH.close; auto f2aFH = file2a.File(); auto f2bFH = file2b.File(); - auto f2aIn = f2aFH.bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize); - auto f2bIn = f2bFH.byLine(Yes.keepTerminator); + auto f2cFH = file2c.File(); + auto f2aIn = f2aFH.byLine(Yes.keepTerminator); + auto f2bIn = f2bFH.bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize)(No.lineBuffered); + auto f2cIn = f2cFH.bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize)(Yes.lineBuffered); - foreach (a, b; lockstep(f2aIn, f2bIn, StoppingPolicy.requireSameLength)) assert(a == b); + foreach (a, b, c; lockstep(f2aIn, f2bIn, f2cIn, StoppingPolicy.requireSameLength)) + { + assert(a == b); + assert(a == c); + } f2aFH.close; f2bFH.close; + f2cFH.close; auto f3aFH = file3a.File(); auto f3bFH = file3b.File(); - auto f3aIn = f3aFH.bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize); - auto f3bIn = f3bFH.byLine(Yes.keepTerminator); + auto f3cFH = file3c.File(); + auto f3aIn = f3aFH.byLine(Yes.keepTerminator); + auto f3bIn = f3bFH.bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize)(No.lineBuffered); + auto f3cIn = f3cFH.bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize)(Yes.lineBuffered); - foreach (a, b; lockstep(f3aIn, f3bIn, StoppingPolicy.requireSameLength)) assert(a == b); + foreach (a, b, c; lockstep(f3aIn, f3bIn, f3cIn, StoppingPolicy.requireSameLength)) + { + assert(a == b); + assert(a == c); + } f3aFH.close; f3bFH.close; + f3cFH.close; }} } } diff --git a/tsv-filter/src/tsv_utils/tsv-filter.d b/tsv-filter/src/tsv_utils/tsv-filter.d index d9f235d..67a28e5 100644 --- a/tsv-filter/src/tsv_utils/tsv-filter.d +++ b/tsv-filter/src/tsv_utils/tsv-filter.d @@ -80,6 +80,7 @@ Global options: --v|invert Invert the filter, printing lines that do not match. --c|count Print only a count of the matched lines. --d|delimiter CHR Field delimiter. Default: TAB. + --line-buffered Immediately output every matched line. Operators: * Test if a field is empty (no characters) or blank (empty or whitespace only). @@ -180,6 +181,8 @@ Details: ensures field 5 is numeric before running the --gt test. * Regular expression syntax is defined by the D programming language. They follow common conventions (perl, python, etc.). Most common forms work as expected. + * Output is buffered by default to improve performance. Use '--line-buffered' to + have each matched line immediately written out. Options: EOS"; @@ -739,6 +742,7 @@ struct TsvFilterOptions bool disjunct = false; /// --or bool countMatches = false; /// --c|count char delim = '\t'; /// --delimiter + bool lineBuffered = false; /// --line-buffered /* Returns a tuple. First value is true if command line arguments were successfully * processed and execution should continue, or false if an error occurred or the user @@ -857,6 +861,7 @@ struct TsvFilterOptions std.getopt.config.caseInsensitive, "c|count", " Print only a count of the matched lines, excluding the header.", &countMatches, "d|delimiter", "CHR Field delimiter. Default: TAB. (Single byte UTF-8 characters only.)", &delim, + "line-buffered", " Immediately output every matched line.", &lineBuffered, "empty", " True if FIELD is empty.", &handlerFldEmpty, "not-empty", " True if FIELD is not empty.", &handlerFldNotEmpty, @@ -998,23 +1003,19 @@ void tsvFilter(ref TsvFilterOptions cmdopt) import std.algorithm : all, any, splitter; import std.format : formattedWrite; import std.range; - import tsv_utils.common.utils : BufferedOutputRange, bufferedByLine, InputSourceRange, - throwIfWindowsNewline; + import tsv_utils.common.utils : BufferedOutputRange, BufferedOutputRangeDefaults, + bufferedByLine, InputSourceRange, LineBuffered, throwIfWindowsNewline; /* inputSources must be an InputSourceRange and include at least stdin. */ assert(!cmdopt.inputSources.empty); static assert(is(typeof(cmdopt.inputSources) == InputSourceRange)); /* BufferedOutputRange improves performance on narrow files with high percentages of - * writes. Want responsive output if output is rare, so ensure the first matched - * line is written, and that writes separated by long stretches of non-matched lines - * are written. + * writes. */ - enum maxInputLinesWithoutBufferFlush = 1024; - size_t inputLinesWithoutBufferFlush = maxInputLinesWithoutBufferFlush + 1; - - auto bufferedOutput = BufferedOutputRange!(typeof(stdout))(stdout); - + immutable size_t flushSize = + cmdopt.lineBuffered ? 1 : BufferedOutputRangeDefaults.reserveSize; + auto bufferedOutput = BufferedOutputRange!(typeof(stdout))(stdout, flushSize); size_t matchedLines = 0; /* First header is read during command line argument processing. Immediately @@ -1032,11 +1033,13 @@ void tsvFilter(ref TsvFilterOptions cmdopt) immutable size_t fileBodyStartLine = cmdopt.hasHeader ? 2 : 1; auto lineFields = new char[][](cmdopt.maxFieldIndex + 1); + immutable LineBuffered isLineBuffered = cmdopt.lineBuffered ? Yes.lineBuffered : No.lineBuffered; + foreach (inputStream; cmdopt.inputSources) { if (cmdopt.hasHeader) throwIfWindowsNewline(inputStream.header, inputStream.name, 1); - foreach (lineNum, line; inputStream.file.bufferedByLine.enumerate(fileBodyStartLine)) + foreach (lineNum, line; inputStream.file.bufferedByLine(isLineBuffered).enumerate(fileBodyStartLine)) { if (lineNum == 1) throwIfWindowsNewline(line, inputStream.name, lineNum); @@ -1070,25 +1073,15 @@ void tsvFilter(ref TsvFilterOptions cmdopt) */ try { - inputLinesWithoutBufferFlush++; bool passed = cmdopt.disjunct ? cmdopt.tests.any!(x => x(lineFields)) : cmdopt.tests.all!(x => x(lineFields)); if (cmdopt.invert) passed = !passed; + if (passed) { ++matchedLines; - - if (!cmdopt.countMatches) - { - const bool wasFlushed = bufferedOutput.appendln(line); - if (wasFlushed) inputLinesWithoutBufferFlush = 0; - else if (inputLinesWithoutBufferFlush > maxInputLinesWithoutBufferFlush) - { - bufferedOutput.flush; - inputLinesWithoutBufferFlush = 0; - } - } + if (!cmdopt.countMatches) bufferedOutput.appendln(line); } } catch (Exception e) diff --git a/tsv-filter/tests/gold/basic_tests_1.txt b/tsv-filter/tests/gold/basic_tests_1.txt index 344073f..89abb70 100644 --- a/tsv-filter/tests/gold/basic_tests_1.txt +++ b/tsv-filter/tests/gold/basic_tests_1.txt @@ -207,6 +207,92 @@ F1 F2 F3 F4 ====[tsv-filter -c --not-empty 1 input1_noheader.tsv ]==== 15 +====Line buffered tests=== + +====[tsv-filter --header --line-buffered --eq 2:1 input1.tsv]==== +F1 F2 F3 F4 +1 1.0 a A + +====[tsv-filter --header --line-buffered --le 2:101 input1.tsv]==== +F1 F2 F3 F4 +1 1.0 a A +2 2. b B +10 10.1 abc ABC +100 100 abc AbC +0 0.0 z AzB +-1 -0.1 abc def abc def +-2 -2.0 ß ss +0. 100. àbc ÀBC +0.0 100.0 àßc ÀssC +-0.0 -100.0 àßc ÀSSC +100 100 AbC +100 100 abc +100 101 + +====[tsv-filter -H --line-buffered --empty F1 input1.tsv]==== +F1 F2 F3 F4 + +====[tsv-filter -H --line-buffered --not-empty F1 input1.tsv ]==== +F1 F2 F3 F4 +1 1.0 a A +2 2. b B +10 10.1 abc ABC +100 100 abc AbC +0 0.0 z AzB +-1 -0.1 abc def abc def +-2 -2.0 ß ss +0. 100. àbc ÀBC +0.0 100.0 àßc ÀssC +-0.0 -100.0 àßc ÀSSC +100 100 AbC +100 100 abc +100 101 +100 102 abc AbC +100 103 abc AbC + +====[tsv-filter --header --count --line-buffered --le 2:101 input1.tsv]==== +13 + +====[tsv-filter --line-buffered --eq 2:1 input1_noheader.tsv]==== +1 1.0 a A + +====[tsv-filter --line-buffered --le 2:101 input1_noheader.tsv]==== +1 1.0 a A +2 2. b B +10 10.1 abc ABC +100 100 abc AbC +0 0.0 z AzB +-1 -0.1 abc def abc def +-2 -2.0 ß ss +0. 100. àbc ÀBC +0.0 100.0 àßc ÀssC +-0.0 -100.0 àßc ÀSSC +100 100 AbC +100 100 abc +100 101 + +====[tsv-filter --line-buffered --empty 1 input1_noheader.tsv]==== + +====[tsv-filter --line-buffered --not-empty 1 input1_noheader.tsv ]==== +1 1.0 a A +2 2. b B +10 10.1 abc ABC +100 100 abc AbC +0 0.0 z AzB +-1 -0.1 abc def abc def +-2 -2.0 ß ss +0. 100. àbc ÀBC +0.0 100.0 àßc ÀssC +-0.0 -100.0 àßc ÀSSC +100 100 AbC +100 100 abc +100 101 +100 102 abc AbC +100 103 abc AbC + +====[tsv-filter --count --line-buffered --le 2:101 input1_noheader.tsv]==== +13 + ====Empty and blank field tests=== ====[tsv-filter --header --empty 3 input1.tsv]==== diff --git a/tsv-filter/tests/tests.sh b/tsv-filter/tests/tests.sh index 781a2b6..6d1363e 100755 --- a/tsv-filter/tests/tests.sh +++ b/tsv-filter/tests/tests.sh @@ -65,6 +65,20 @@ runtest ${prog} "--le 2:101 input1_noheader.tsv -c" ${basic_tests_1} runtest ${prog} "--count --empty 1 input1_noheader.tsv" ${basic_tests_1} runtest ${prog} "-c --not-empty 1 input1_noheader.tsv " ${basic_tests_1} +# Line buffered tests +echo "" >> ${basic_tests_1}; echo "====Line buffered tests===" >> ${basic_tests_1} +runtest ${prog} "--header --line-buffered --eq 2:1 input1.tsv" ${basic_tests_1} +runtest ${prog} "--header --line-buffered --le 2:101 input1.tsv" ${basic_tests_1} +runtest ${prog} "-H --line-buffered --empty F1 input1.tsv" ${basic_tests_1} +runtest ${prog} "-H --line-buffered --not-empty F1 input1.tsv " ${basic_tests_1} +runtest ${prog} "--header --count --line-buffered --le 2:101 input1.tsv" ${basic_tests_1} + +runtest ${prog} "--line-buffered --eq 2:1 input1_noheader.tsv" ${basic_tests_1} +runtest ${prog} "--line-buffered --le 2:101 input1_noheader.tsv" ${basic_tests_1} +runtest ${prog} "--line-buffered --empty 1 input1_noheader.tsv" ${basic_tests_1} +runtest ${prog} "--line-buffered --not-empty 1 input1_noheader.tsv " ${basic_tests_1} +runtest ${prog} "--count --line-buffered --le 2:101 input1_noheader.tsv" ${basic_tests_1} + # Empty and blank field tests echo "" >> ${basic_tests_1}; echo "====Empty and blank field tests===" >> ${basic_tests_1}