Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

number-lines --line-buffered #335

Merged
merged 2 commits into from
Mar 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bash_completion/tsv-utils
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ _number_lines()
COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}"
prev="${COMP_WORDS[COMP_CWORD-1]}"
opts="--help --version --header --header-string --start-number --delimiter"
opts="--help --version --header --header-string --start-number --delimiter --line-buffered"

# Options requiring an argument or precluding other options
case $prev in
Expand Down
39 changes: 23 additions & 16 deletions number-lines/src/tsv_utils/number-lines.d
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,12 @@ struct NumberLinesOptions
enum defaultHeaderString = "line";

string programName;
bool hasHeader = false; // --H|header
string headerString = ""; // --s|header-string
long startNum = 1; // --n|start-num
char delim = '\t'; // --d|delimiter
bool versionWanted = false; // --V|version
bool hasHeader = false; /// --H|header
string headerString = ""; /// --s|header-string
long startNum = 1; /// --n|start-num
char delim = '\t'; /// --d|delimiter
bool lineBuffered = false; /// --line-buffered
bool versionWanted = false; /// --V|version

/* Returns a tuple. First value is true if command line arguments were successfully
* processed and execution should continue, or false if an error occurred or the user
Expand All @@ -67,6 +68,7 @@ struct NumberLinesOptions
"s|header-string", "STR String to use in the header row. Implies --header. Default: 'line'", &headerString,
"n|start-number", "NUM Number to use for the first line. Default: 1", &startNum,
"d|delimiter", "CHR Character appended to line number, preceding the rest of the line. Default: TAB (Single byte UTF-8 characters only.)", &delim,
"line-buffered", " Immediately output every line.", &lineBuffered,
std.getopt.config.caseSensitive,
"V|version", " Print version information and exit.", &versionWanted,
std.getopt.config.caseInsensitive,
Expand Down Expand Up @@ -126,21 +128,34 @@ int main(string[] cmdArgs)
*
* Reads lines lines from each file, outputing each with a line number prepended. The
* header from the first file is written, the header from subsequent files is dropped.
*
* Note: number-lines does not immediately flush the header line like most other
* tsv-utils tools. This is because it directly uses bufferedByLine, which does not
* support reading the header line independently of the rest of the buffer.
*/
void numberLines(const NumberLinesOptions cmdopt, const string[] inputFiles)
{
import std.conv : to;
import std.range;
import tsv_utils.common.utils : bufferedByLine, BufferedOutputRange;
import tsv_utils.common.utils : BufferedOutputRange, BufferedOutputRangeDefaults,
bufferedByLine, LineBuffered;

auto bufferedOutput = BufferedOutputRange!(typeof(stdout))(stdout);
immutable size_t flushSize = cmdopt.lineBuffered ?
BufferedOutputRangeDefaults.lineBufferedFlushSize :
BufferedOutputRangeDefaults.flushSize;
auto bufferedOutput = BufferedOutputRange!(typeof(stdout))(stdout, flushSize);

long lineNum = cmdopt.startNum;
bool headerWritten = false;
immutable LineBuffered isLineBuffered = cmdopt.lineBuffered ? Yes.lineBuffered : No.lineBuffered;

foreach (filename; (inputFiles.length > 0) ? inputFiles : ["-"])
{
auto inputStream = (filename == "-") ? stdin : filename.File();
foreach (fileLineNum, line; inputStream.bufferedByLine!(KeepTerminator.no).enumerate(1))
foreach (fileLineNum, line;
inputStream
.bufferedByLine!(KeepTerminator.no)(isLineBuffered)
.enumerate(1))
{
if (cmdopt.hasHeader && fileLineNum == 1)
{
Expand All @@ -150,14 +165,6 @@ void numberLines(const NumberLinesOptions cmdopt, const string[] inputFiles)
bufferedOutput.append(cmdopt.delim);
bufferedOutput.appendln(line);
headerWritten = true;

/* Flush the header immediately. This helps tasks further on in a
* unix pipeline detect errors quickly, without waiting for all
* the data to flow through the pipeline. Note that an upstream
* task may have flushed its header line, so the header may
* arrive long before the main block of data.
*/
bufferedOutput.flush;
}
}
else
Expand Down
43 changes: 43 additions & 0 deletions number-lines/tests/gold/basic_tests_1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,49 @@ line The first line
8
9 Previous line a single tab

====line buffered tests===

====[number-lines --line-buffered input1.txt]====
1 The first line - Is it a header?
2 abc def ghi
3 some random text
4 Japanese: 私はガラスを食べられます。それは私を傷つけません。
5
6 Previous line blank
7
8 Previous line a single tab

====[number-lines empty-file.txt]====

====[number-lines -H empty-file.txt]====

====[number-lines --line-buffered input1.txt input2.txt empty-file.txt one-line-file.txt]====
1 The first line - Is it a header?
2 abc def ghi
3 some random text
4 Japanese: 私はガラスを食べられます。それは私を傷つけません。
5
6 Previous line blank
7
8 Previous line a single tab
9 The first line
10 The second line
11 The third line
12 The one line

====[cat input1.txt input2.txt | number-lines --header --line-buffered]====
line The first line - Is it a header?
1 abc def ghi
2 some random text
3 Japanese: 私はガラスを食べられます。それは私を傷つけません。
4
5 Previous line blank
6
7 Previous line a single tab
8 The first line
9 The second line
10 The third line

Help and Version printing 1
-----------------

Expand Down
10 changes: 10 additions & 0 deletions number-lines/tests/tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,16 @@ cat input1.txt | ${prog} -- input2.txt - one-line-file.txt >> ${basic_tests_1} 2
echo "" >> ${basic_tests_1}; echo "====[cat input1.txt | number-lines --header -- input2.txt - one-line-file.txt]====" >> ${basic_tests_1}
cat input1.txt | ${prog} --header -- input2.txt - one-line-file.txt >> ${basic_tests_1} 2>&1

## --line-buffered tests
echo "" >> ${basic_tests_1}; echo "====line buffered tests===" >> ${basic_tests_1}
runtest ${prog} "--line-buffered input1.txt" ${basic_tests_1}
runtest ${prog} "empty-file.txt" ${basic_tests_1}
runtest ${prog} "-H empty-file.txt" ${basic_tests_1}
runtest ${prog} "--line-buffered input1.txt input2.txt empty-file.txt one-line-file.txt" ${basic_tests_1}

echo "" >> ${basic_tests_1}; echo "====[cat input1.txt input2.txt | number-lines --header --line-buffered]====" >> ${basic_tests_1}
cat input1.txt input2.txt | ${prog} --header --line-buffered >> ${basic_tests_1} 2>&1

## Help and Version printing

echo "" >> ${basic_tests_1}
Expand Down
50 changes: 50 additions & 0 deletions tsv-select/tests/gold/basic_tests_1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1328,6 +1328,56 @@ r1c20 r1c19 r1c18 r1c17 r1c16 r1c15 r1c14 r1c13 r1c12 r1c5 r1c2
濡れ羽色 ab*56 ab.56 ab,56 ab/56 ab\56 ab-56 ab 56 ab:56 56*7 56.7 56,7 56/7 56\7 56-7 56 7 56:7 01 1 ab*c ab.c ab,c ab/c ab\c ab-c ab c ab:c
r1c26 r1c25 r1c24 r1c23 r1c22 r1c21 r1c20 r1c19 r1c18 r1c17 r1c16 r1c15 r1c14 r1c13 r1c12 r1c11 r1c10 r1c9 r1c8 r1c7 r1c6 r1c5 r1c4 r1c3 r1c2 r1c1 r1c0

line-buffered tests
-------------------

====[tsv-select --line-buffered --fields 1 input1.tsv]====
f1
1

3
4
5
6
7
8

====[tsv-select --line-buffered -e 2,3,5 -f 4,1 input1.tsv]====
f4 f1
101 1
5734
7 3
4
1367 5
f23-empty 6
f23-space 7
1931 8

====[tsv-select --line-buffered -f 1 --delimiter ^ input_2plus_hat_delim.tsv]====
f1
abc


123


====[tsv-select --line-buffered -f 1 input_emptyfile.tsv]====

====[tsv-select --line-buffered -H -f 1 input_emptyfile.tsv]====

====[tsv-select --line-buffered --header -f 1 input_header1.tsv]====
field1
11567
21567

====[tsv-select --line-buffered -H -f field2 input_header1.tsv input_header2.tsv input_header3.tsv input_header4.tsv]====
field2
12567
22567
12987
12888
22888

Help and Version printing 1
-----------------

Expand Down
11 changes: 11 additions & 0 deletions tsv-select/tests/tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,17 @@ ${prog} -H input_header_variants.tsv -f 'ab\-56-56\-7,ab\,c,ab\-c' >> ${basic_te
echo "" >> ${basic_tests_1}; echo "====[tsv-select -H input_header_variants.tsv -f '濡れ羽色-ab\*56,ab.56-ab\,56,ab/56-ab\\56,ab\-56-ab\ 56,ab\:56-56\*7,56.7-56\,7,56/7-56\\7,56\-7-56\ 7,56\:7-\01,\1-ab\*c,ab\.c-ab\,c,ab/c-ab\\c,ab\-c-ab\ c,ab\:c']===" >> ${basic_tests_1}
${prog} -H input_header_variants.tsv -f '濡れ羽色-ab\*56,ab.56-ab\,56,ab/56-ab\\56,ab\-56-ab\ 56,ab\:56-56\*7,56.7-56\,7,56/7-56\\7,56\-7-56\ 7,56\:7-\01,\1-ab\*c,ab\.c-ab\,c,ab/c-ab\\c,ab\-c-ab\ c,ab\:c' >> ${basic_tests_1} 2>&1

echo "" >> ${basic_tests_1}
echo "line-buffered tests" >> ${basic_tests_1}
echo "-------------------" >> ${basic_tests_1}
runtest ${prog} "--line-buffered --fields 1 input1.tsv" ${basic_tests_1}
runtest ${prog} "--line-buffered -e 2,3,5 -f 4,1 input1.tsv" ${basic_tests_1}
runtest ${prog} "--line-buffered -f 1 --delimiter ^ input_2plus_hat_delim.tsv" ${basic_tests_1}
runtest ${prog} "--line-buffered -f 1 input_emptyfile.tsv" ${basic_tests_1}
runtest ${prog} "--line-buffered -H -f 1 input_emptyfile.tsv" ${basic_tests_1}
runtest ${prog} "--line-buffered --header -f 1 input_header1.tsv" ${basic_tests_1}
runtest ${prog} "--line-buffered -H -f field2 input_header1.tsv input_header2.tsv input_header3.tsv input_header4.tsv" ${basic_tests_1}

## Help and Version printing

echo "" >> ${basic_tests_1}
Expand Down