Skip to content

Commit

Permalink
number-lines --line-buffered.
Browse files Browse the repository at this point in the history
  • Loading branch information
jondegenhardt committed Mar 5, 2021
1 parent 3e46d6e commit 22ed975
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 17 deletions.
2 changes: 1 addition & 1 deletion bash_completion/tsv-utils
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ _number_lines()
COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}"
prev="${COMP_WORDS[COMP_CWORD-1]}"
opts="--help --version --header --header-string --start-number --delimiter"
opts="--help --version --header --header-string --start-number --delimiter --line-buffered"

# Options requiring an argument or precluding other options
case $prev in
Expand Down
39 changes: 23 additions & 16 deletions number-lines/src/tsv_utils/number-lines.d
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,12 @@ struct NumberLinesOptions
enum defaultHeaderString = "line";

string programName;
bool hasHeader = false; // --H|header
string headerString = ""; // --s|header-string
long startNum = 1; // --n|start-num
char delim = '\t'; // --d|delimiter
bool versionWanted = false; // --V|version
bool hasHeader = false; /// --H|header
string headerString = ""; /// --s|header-string
long startNum = 1; /// --n|start-num
char delim = '\t'; /// --d|delimiter
bool lineBuffered = false; /// --line-buffered
bool versionWanted = false; /// --V|version

/* Returns a tuple. First value is true if command line arguments were successfully
* processed and execution should continue, or false if an error occurred or the user
Expand All @@ -67,6 +68,7 @@ struct NumberLinesOptions
"s|header-string", "STR String to use in the header row. Implies --header. Default: 'line'", &headerString,
"n|start-number", "NUM Number to use for the first line. Default: 1", &startNum,
"d|delimiter", "CHR Character appended to line number, preceding the rest of the line. Default: TAB (Single byte UTF-8 characters only.)", &delim,
"line-buffered", " Immediately output every line.", &lineBuffered,
std.getopt.config.caseSensitive,
"V|version", " Print version information and exit.", &versionWanted,
std.getopt.config.caseInsensitive,
Expand Down Expand Up @@ -126,21 +128,34 @@ int main(string[] cmdArgs)
*
* Reads lines lines from each file, outputing each with a line number prepended. The
* header from the first file is written, the header from subsequent files is dropped.
*
* Note: number-lines does not immediately flush the header line like most other
* tsv-utils tools. This is because it directly uses bufferedByLine, which does not
* support reading the header line independently of the rest of the buffer.
*/
void numberLines(const NumberLinesOptions cmdopt, const string[] inputFiles)
{
import std.conv : to;
import std.range;
import tsv_utils.common.utils : bufferedByLine, BufferedOutputRange;
import tsv_utils.common.utils : BufferedOutputRange, BufferedOutputRangeDefaults,
bufferedByLine, LineBuffered;

auto bufferedOutput = BufferedOutputRange!(typeof(stdout))(stdout);
immutable size_t flushSize = cmdopt.lineBuffered ?
BufferedOutputRangeDefaults.lineBufferedFlushSize :
BufferedOutputRangeDefaults.flushSize;
auto bufferedOutput = BufferedOutputRange!(typeof(stdout))(stdout, flushSize);

long lineNum = cmdopt.startNum;
bool headerWritten = false;
immutable LineBuffered isLineBuffered = cmdopt.lineBuffered ? Yes.lineBuffered : No.lineBuffered;

foreach (filename; (inputFiles.length > 0) ? inputFiles : ["-"])
{
auto inputStream = (filename == "-") ? stdin : filename.File();
foreach (fileLineNum, line; inputStream.bufferedByLine!(KeepTerminator.no).enumerate(1))
foreach (fileLineNum, line;
inputStream
.bufferedByLine!(KeepTerminator.no)(isLineBuffered)
.enumerate(1))
{
if (cmdopt.hasHeader && fileLineNum == 1)
{
Expand All @@ -150,14 +165,6 @@ void numberLines(const NumberLinesOptions cmdopt, const string[] inputFiles)
bufferedOutput.append(cmdopt.delim);
bufferedOutput.appendln(line);
headerWritten = true;

/* Flush the header immediately. This helps tasks further on in a
* unix pipeline detect errors quickly, without waiting for all
* the data to flow through the pipeline. Note that an upstream
* task may have flushed its header line, so the header may
* arrive long before the main block of data.
*/
bufferedOutput.flush;
}
}
else
Expand Down
43 changes: 43 additions & 0 deletions number-lines/tests/gold/basic_tests_1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,49 @@ line The first line
8
9 Previous line a single tab

====line buffered tests===

====[number-lines --line-buffered input1.txt]====
1 The first line - Is it a header?
2 abc def ghi
3 some random text
4 Japanese: 私はガラスを食べられます。それは私を傷つけません。
5
6 Previous line blank
7
8 Previous line a single tab

====[number-lines empty-file.txt]====

====[number-lines -H empty-file.txt]====

====[number-lines --line-buffered input1.txt input2.txt empty-file.txt one-line-file.txt]====
1 The first line - Is it a header?
2 abc def ghi
3 some random text
4 Japanese: 私はガラスを食べられます。それは私を傷つけません。
5
6 Previous line blank
7
8 Previous line a single tab
9 The first line
10 The second line
11 The third line
12 The one line

====[cat input1.txt input2.txt | number-lines --header --line-buffered]====
line The first line - Is it a header?
1 abc def ghi
2 some random text
3 Japanese: 私はガラスを食べられます。それは私を傷つけません。
4
5 Previous line blank
6
7 Previous line a single tab
8 The first line
9 The second line
10 The third line

Help and Version printing 1
-----------------

Expand Down
10 changes: 10 additions & 0 deletions number-lines/tests/tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,16 @@ cat input1.txt | ${prog} -- input2.txt - one-line-file.txt >> ${basic_tests_1} 2
echo "" >> ${basic_tests_1}; echo "====[cat input1.txt | number-lines --header -- input2.txt - one-line-file.txt]====" >> ${basic_tests_1}
cat input1.txt | ${prog} --header -- input2.txt - one-line-file.txt >> ${basic_tests_1} 2>&1

## --line-buffered tests
echo "" >> ${basic_tests_1}; echo "====line buffered tests===" >> ${basic_tests_1}
runtest ${prog} "--line-buffered input1.txt" ${basic_tests_1}
runtest ${prog} "empty-file.txt" ${basic_tests_1}
runtest ${prog} "-H empty-file.txt" ${basic_tests_1}
runtest ${prog} "--line-buffered input1.txt input2.txt empty-file.txt one-line-file.txt" ${basic_tests_1}

echo "" >> ${basic_tests_1}; echo "====[cat input1.txt input2.txt | number-lines --header --line-buffered]====" >> ${basic_tests_1}
cat input1.txt input2.txt | ${prog} --header --line-buffered >> ${basic_tests_1} 2>&1

## Help and Version printing

echo "" >> ${basic_tests_1}
Expand Down

0 comments on commit 22ed975

Please sign in to comment.