From 22ed975bc1c00423a261bd2e8978772ecce13e3f Mon Sep 17 00:00:00 2001 From: Jon Degenhardt Date: Thu, 4 Mar 2021 23:55:58 -0800 Subject: [PATCH] number-lines --line-buffered. --- bash_completion/tsv-utils | 2 +- number-lines/src/tsv_utils/number-lines.d | 39 +++++++++++--------- number-lines/tests/gold/basic_tests_1.txt | 43 +++++++++++++++++++++++ number-lines/tests/tests.sh | 10 ++++++ 4 files changed, 77 insertions(+), 17 deletions(-) diff --git a/bash_completion/tsv-utils b/bash_completion/tsv-utils index 697def0f..5ee2a1c3 100644 --- a/bash_completion/tsv-utils +++ b/bash_completion/tsv-utils @@ -48,7 +48,7 @@ _number_lines() COMPREPLY=() cur="${COMP_WORDS[COMP_CWORD]}" prev="${COMP_WORDS[COMP_CWORD-1]}" - opts="--help --version --header --header-string --start-number --delimiter" + opts="--help --version --header --header-string --start-number --delimiter --line-buffered" # Options requiring an argument or precluding other options case $prev in diff --git a/number-lines/src/tsv_utils/number-lines.d b/number-lines/src/tsv_utils/number-lines.d index 0fab451b..d5ad023c 100644 --- a/number-lines/src/tsv_utils/number-lines.d +++ b/number-lines/src/tsv_utils/number-lines.d @@ -39,11 +39,12 @@ struct NumberLinesOptions enum defaultHeaderString = "line"; string programName; - bool hasHeader = false; // --H|header - string headerString = ""; // --s|header-string - long startNum = 1; // --n|start-num - char delim = '\t'; // --d|delimiter - bool versionWanted = false; // --V|version + bool hasHeader = false; /// --H|header + string headerString = ""; /// --s|header-string + long startNum = 1; /// --n|start-num + char delim = '\t'; /// --d|delimiter + bool lineBuffered = false; /// --line-buffered + bool versionWanted = false; /// --V|version /* Returns a tuple. First value is true if command line arguments were successfully * processed and execution should continue, or false if an error occurred or the user @@ -67,6 +68,7 @@ struct NumberLinesOptions "s|header-string", "STR String to use in the header row. Implies --header. Default: 'line'", &headerString, "n|start-number", "NUM Number to use for the first line. Default: 1", &startNum, "d|delimiter", "CHR Character appended to line number, preceding the rest of the line. Default: TAB (Single byte UTF-8 characters only.)", &delim, + "line-buffered", " Immediately output every line.", &lineBuffered, std.getopt.config.caseSensitive, "V|version", " Print version information and exit.", &versionWanted, std.getopt.config.caseInsensitive, @@ -126,21 +128,34 @@ int main(string[] cmdArgs) * * Reads lines lines from each file, outputing each with a line number prepended. The * header from the first file is written, the header from subsequent files is dropped. + * + * Note: number-lines does not immediately flush the header line like most other + * tsv-utils tools. This is because it directly uses bufferedByLine, which does not + * support reading the header line independently of the rest of the buffer. */ void numberLines(const NumberLinesOptions cmdopt, const string[] inputFiles) { import std.conv : to; import std.range; - import tsv_utils.common.utils : bufferedByLine, BufferedOutputRange; + import tsv_utils.common.utils : BufferedOutputRange, BufferedOutputRangeDefaults, + bufferedByLine, LineBuffered; - auto bufferedOutput = BufferedOutputRange!(typeof(stdout))(stdout); + immutable size_t flushSize = cmdopt.lineBuffered ? + BufferedOutputRangeDefaults.lineBufferedFlushSize : + BufferedOutputRangeDefaults.flushSize; + auto bufferedOutput = BufferedOutputRange!(typeof(stdout))(stdout, flushSize); long lineNum = cmdopt.startNum; bool headerWritten = false; + immutable LineBuffered isLineBuffered = cmdopt.lineBuffered ? Yes.lineBuffered : No.lineBuffered; + foreach (filename; (inputFiles.length > 0) ? inputFiles : ["-"]) { auto inputStream = (filename == "-") ? stdin : filename.File(); - foreach (fileLineNum, line; inputStream.bufferedByLine!(KeepTerminator.no).enumerate(1)) + foreach (fileLineNum, line; + inputStream + .bufferedByLine!(KeepTerminator.no)(isLineBuffered) + .enumerate(1)) { if (cmdopt.hasHeader && fileLineNum == 1) { @@ -150,14 +165,6 @@ void numberLines(const NumberLinesOptions cmdopt, const string[] inputFiles) bufferedOutput.append(cmdopt.delim); bufferedOutput.appendln(line); headerWritten = true; - - /* Flush the header immediately. This helps tasks further on in a - * unix pipeline detect errors quickly, without waiting for all - * the data to flow through the pipeline. Note that an upstream - * task may have flushed its header line, so the header may - * arrive long before the main block of data. - */ - bufferedOutput.flush; } } else diff --git a/number-lines/tests/gold/basic_tests_1.txt b/number-lines/tests/gold/basic_tests_1.txt index 3fdf8ab0..7c7d2747 100644 --- a/number-lines/tests/gold/basic_tests_1.txt +++ b/number-lines/tests/gold/basic_tests_1.txt @@ -295,6 +295,49 @@ line The first line 8 9 Previous line a single tab +====line buffered tests=== + +====[number-lines --line-buffered input1.txt]==== +1 The first line - Is it a header? +2 abc def ghi +3 some random text +4 Japanese: 私はガラスを食べられます。それは私を傷つけません。 +5 +6 Previous line blank +7 +8 Previous line a single tab + +====[number-lines empty-file.txt]==== + +====[number-lines -H empty-file.txt]==== + +====[number-lines --line-buffered input1.txt input2.txt empty-file.txt one-line-file.txt]==== +1 The first line - Is it a header? +2 abc def ghi +3 some random text +4 Japanese: 私はガラスを食べられます。それは私を傷つけません。 +5 +6 Previous line blank +7 +8 Previous line a single tab +9 The first line +10 The second line +11 The third line +12 The one line + +====[cat input1.txt input2.txt | number-lines --header --line-buffered]==== +line The first line - Is it a header? +1 abc def ghi +2 some random text +3 Japanese: 私はガラスを食べられます。それは私を傷つけません。 +4 +5 Previous line blank +6 +7 Previous line a single tab +8 The first line +9 The second line +10 The third line + Help and Version printing 1 ----------------- diff --git a/number-lines/tests/tests.sh b/number-lines/tests/tests.sh index 566dabc1..50e7d113 100755 --- a/number-lines/tests/tests.sh +++ b/number-lines/tests/tests.sh @@ -67,6 +67,16 @@ cat input1.txt | ${prog} -- input2.txt - one-line-file.txt >> ${basic_tests_1} 2 echo "" >> ${basic_tests_1}; echo "====[cat input1.txt | number-lines --header -- input2.txt - one-line-file.txt]====" >> ${basic_tests_1} cat input1.txt | ${prog} --header -- input2.txt - one-line-file.txt >> ${basic_tests_1} 2>&1 +## --line-buffered tests +echo "" >> ${basic_tests_1}; echo "====line buffered tests===" >> ${basic_tests_1} +runtest ${prog} "--line-buffered input1.txt" ${basic_tests_1} +runtest ${prog} "empty-file.txt" ${basic_tests_1} +runtest ${prog} "-H empty-file.txt" ${basic_tests_1} +runtest ${prog} "--line-buffered input1.txt input2.txt empty-file.txt one-line-file.txt" ${basic_tests_1} + +echo "" >> ${basic_tests_1}; echo "====[cat input1.txt input2.txt | number-lines --header --line-buffered]====" >> ${basic_tests_1} +cat input1.txt input2.txt | ${prog} --header --line-buffered >> ${basic_tests_1} 2>&1 + ## Help and Version printing echo "" >> ${basic_tests_1}