Skip to content

Commit

Permalink
csv2tsv newline replacement (#303)
Browse files Browse the repository at this point in the history
* csv2tsv: Separate command line arguments for TAB and Newline replacement characters.

* Update test result files for parameter name change.

* Undo errant character addition.
  • Loading branch information
jondegenhardt authored Sep 8, 2020
1 parent 615914f commit c439745
Show file tree
Hide file tree
Showing 6 changed files with 114 additions and 27 deletions.
4 changes: 2 additions & 2 deletions bash_completion/tsv-utils
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ _csv2tsv()
COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}"
prev="${COMP_WORDS[COMP_CWORD-1]}"
opts="--help --help-verbose --version --header --quote --csv-delim --tsv-delim --replacement"
opts="--help --help-verbose --version --header --quote --csv-delim --tsv-delim --tab-replacement --newline-replacement"

# Options requiring an argument or precluding other options
case $prev in
-h|--help|--help-verbose|-V|--version|-q|--quote|-c|--csv-delim|-t|--tsv-delim|-r|--replacement)
-h|--help|--help-verbose|-V|--version|-q|--quote|-c|--csv-delim|-t|--tsv-delim|-r|--tab-replacement|-n|--newline-replacement)
return
;;
esac
Expand Down
25 changes: 15 additions & 10 deletions csv2tsv/src/tsv_utils/csv2tsv.d
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ struct Csv2tsvOptions
char csvQuoteChar = '"'; // --q|quote
char csvDelimChar = ','; // --c|csv-delim
char tsvDelimChar = '\t'; // --t|tsv-delim
string tsvDelimReplacement = " "; // --r|replacement
string tsvDelimReplacement = " "; // --r|tab-replacement
string newlineReplacement = " "; // --n|newline-replacement
bool versionWanted = false; // --V|version

auto processArgs (ref string[] cmdArgs)
Expand All @@ -94,16 +95,17 @@ struct Csv2tsvOptions
{
auto r = getopt(
cmdArgs,
"help-verbose", " Print full help.", &helpVerbose,
"help-verbose", " Print full help.", &helpVerbose,
std.getopt.config.caseSensitive,
"H|header", " Treat the first line of each file as a header. Only the header of the first file is output.", &hasHeader,
"H|header", " Treat the first line of each file as a header. Only the header of the first file is output.", &hasHeader,
std.getopt.config.caseSensitive,
"q|quote", "CHR Quoting character in CSV data. Default: double-quote (\")", &csvQuoteChar,
"c|csv-delim", "CHR Field delimiter in CSV data. Default: comma (,).", &csvDelimChar,
"t|tsv-delim", "CHR Field delimiter in TSV data. Default: TAB", &tsvDelimChar,
"r|replacement", "STR Replacement for newline and TSV field delimiters found in CSV input. Default: Space.", &tsvDelimReplacement,
"q|quote", "CHR Quoting character in CSV data. Default: double-quote (\")", &csvQuoteChar,
"c|csv-delim", "CHR Field delimiter in CSV data. Default: comma (,).", &csvDelimChar,
"t|tsv-delim", "CHR Field delimiter in TSV data. Default: TAB", &tsvDelimChar,
"r|tab-replacement", "STR Replacement for TSV field delimiters (typically TABs) found in CSV input. Default: Space.", &tsvDelimReplacement,
"n|newline-replacement", "STR Replacement for newlines found in CSV input. Default: Space.", &newlineReplacement,
std.getopt.config.caseSensitive,
"V|version", " Print version information and exit.", &versionWanted,
"V|version", " Print version information and exit.", &versionWanted,
std.getopt.config.caseInsensitive,
);

Expand Down Expand Up @@ -141,7 +143,10 @@ struct Csv2tsvOptions
"TSV field delimiter cannot be newline (--t|tsv-delim).");

enforce(!canFind!(c => (c == '\n' || c == '\r' || c == tsvDelimChar))(tsvDelimReplacement),
"Replacement character cannot contain newlines or TSV field delimiters (--r|replacement).");
"Replacement character cannot contain newlines or TSV field delimiters (--r|tab-replacement).");

enforce(!canFind!(c => (c == '\n' || c == '\r' || c == tsvDelimChar))(newlineReplacement),
"Replacement character cannot contain newlines or TSV field delimiters (--n|newline-replacement).");
}
catch (Exception exc)
{
Expand Down Expand Up @@ -208,7 +213,7 @@ void csv2tsvFiles(const ref Csv2tsvOptions cmdopt, const string[] inputFiles)
csv2tsv(inputStream, stdoutWriter, fileRawBuf, printFileName, skipLines,
cmdopt.csvQuoteChar, cmdopt.csvDelimChar,
cmdopt.tsvDelimChar, cmdopt.tsvDelimReplacement,
cmdopt.tsvDelimReplacement);
cmdopt.newlineReplacement);

firstFile = false;
}
Expand Down
49 changes: 47 additions & 2 deletions csv2tsv/tests/gold/basic_tests_1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,20 +82,65 @@ QCQ QNQ CNQACAQ _Q_NCCQCQ
A AA AAA AAAA
a ab abc abcd

====[csv2tsv --quote # --csv-delim | --tsv-delim $ --replacement <==> input2.csv]====
====[csv2tsv --quote # --csv-delim | --tsv-delim $ --tab-replacement <==> --newline-replacement <==> input2.csv]====
field1$field2$field3
123$456$789
234$567$890
|abc$#def#$gh><==>ijk><==>lmn<
ABC$DEF$GHI

====[csv2tsv -q # -c | -t @ -r <--> input2.csv]====
====[csv2tsv -q # -c | -t @ -r <--> -n <--> input2.csv]====
field1@field2@field3
123@456@789
234@567@890
|abc@#def#@gh><-->ijk><-->lmn<
ABC@DEF@GHI

====[csv2tsv input3.csv]====
Type Value1 Value2
Vanilla ABC 123
Quoted ABC 123
With Comma abc,def 123,4
With Quotes Say "Hello World!" 10" high
With Newline Value 1 Line 1 Value 1 Line 2 Value 2 Line 1 Value 2 Line 2
With TAB ABC DEF 123 456

====[csv2tsv --tab-replacement <TAB> input3.csv]====
Type Value1 Value2
Vanilla ABC 123
Quoted ABC 123
With Comma abc,def 123,4
With Quotes Say "Hello World!" 10" high
With Newline Value 1 Line 1 Value 1 Line 2 Value 2 Line 1 Value 2 Line 2
With TAB ABC<TAB>DEF 123<TAB>456

====[csv2tsv --newline-replacement <NL> input3.csv]====
Type Value1 Value2
Vanilla ABC 123
Quoted ABC 123
With Comma abc,def 123,4
With Quotes Say "Hello World!" 10" high
With Newline Value 1 Line 1<NL>Value 1 Line 2 Value 2 Line 1<NL>Value 2 Line 2
With TAB ABC DEF 123 456

====[csv2tsv -r <TAB> -n <NL> input3.csv]====
Type Value1 Value2
Vanilla ABC 123
Quoted ABC 123
With Comma abc,def 123,4
With Quotes Say "Hello World!" 10" high
With Newline Value 1 Line 1<NL>Value 1 Line 2 Value 2 Line 1<NL>Value 2 Line 2
With TAB ABC<TAB>DEF 123<TAB>456

====[csv2tsv -r ␉ -n ␤ input3.csv]====
Type Value1 Value2
Vanilla ABC 123
Quoted ABC 123
With Comma abc,def 123,4
With Quotes Say "Hello World!" 10" high
With Newline Value 1 Line 1␤Value 1 Line 2 Value 2 Line 1␤Value 2 Line 2
With TAB ABC␉DEF 123␉456

====[csv2tsv header1.csv header2.csv header3.csv header4.csv header5.csv]====
field1 field2 field3
123 456 789
Expand Down
26 changes: 19 additions & 7 deletions csv2tsv/tests/gold/error_tests_1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,29 @@ Error [csv2tsv]: Cannot open file `nosuchfile.txt' in mode `rb' (No such file or
====[csv2tsv --tsv-delim $'\r' input2.csv]====
[csv2tsv] Error processing command line arguments: TSV field delimiter cannot be newline (--t|tsv-delim).

====[csv2tsv --replacement $'\n' input2.csv]====
[csv2tsv] Error processing command line arguments: Replacement character cannot contain newlines or TSV field delimiters (--r|replacement).
====[csv2tsv --tab-replacement $'\n' input2.csv]====
[csv2tsv] Error processing command line arguments: Replacement character cannot contain newlines or TSV field delimiters (--r|tab-replacement).

====[csv2tsv --replacement $'\r' input2.csv]====
[csv2tsv] Error processing command line arguments: Replacement character cannot contain newlines or TSV field delimiters (--r|replacement).
====[csv2tsv --tab-replacement $'\r' input2.csv]====
[csv2tsv] Error processing command line arguments: Replacement character cannot contain newlines or TSV field delimiters (--r|tab-replacement).

====[csv2tsv -r $'__\n__' input2.csv]====
[csv2tsv] Error processing command line arguments: Replacement character cannot contain newlines or TSV field delimiters (--r|replacement).
[csv2tsv] Error processing command line arguments: Replacement character cannot contain newlines or TSV field delimiters (--r|tab-replacement).

====[csv2tsv -r $'__\r__' input2.csv]====
[csv2tsv] Error processing command line arguments: Replacement character cannot contain newlines or TSV field delimiters (--r|replacement).
[csv2tsv] Error processing command line arguments: Replacement character cannot contain newlines or TSV field delimiters (--r|tab-replacement).

====[csv2tsv --newline-replacement $'\n' input2.csv]====
[csv2tsv] Error processing command line arguments: Replacement character cannot contain newlines or TSV field delimiters (--n|newline-replacement).

====[csv2tsv --newline-replacement $'\r' input2.csv]====
[csv2tsv] Error processing command line arguments: Replacement character cannot contain newlines or TSV field delimiters (--n|newline-replacement).

====[csv2tsv -n $'__\n__' input2.csv]====
[csv2tsv] Error processing command line arguments: Replacement character cannot contain newlines or TSV field delimiters (--n|newline-replacement).

====[csv2tsv -n $'__\r__' input2.csv]====
[csv2tsv] Error processing command line arguments: Replacement character cannot contain newlines or TSV field delimiters (--n|newline-replacement).

====[csv2tsv -q x -c x input2.csv]====
[csv2tsv] Error processing command line arguments: CSV quote and CSV field delimiter characters must be different (--q|quote, --c|csv-delim).
Expand All @@ -45,7 +57,7 @@ Error [csv2tsv]: Cannot open file `nosuchfile.txt' in mode `rb' (No such file or
[csv2tsv] Error processing command line arguments: CSV quote and TSV field delimiter characters must be different (--q|quote, --t|tsv-delim).

====[csv2tsv -t x -r wxyz input2.csv]====
[csv2tsv] Error processing command line arguments: Replacement character cannot contain newlines or TSV field delimiters (--r|replacement).
[csv2tsv] Error processing command line arguments: Replacement character cannot contain newlines or TSV field delimiters (--r|tab-replacement).

====[csv2tsv invalid1.csv]====
Error [csv2tsv]: Invalid CSV. Improperly terminated quoted field. File: invalid1.csv, Line: 3
Expand Down
9 changes: 9 additions & 0 deletions csv2tsv/tests/input3.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Type,Value1,Value2
Vanilla,ABC,123
Quoted,"ABC","123"
With Comma,"abc,def","123,4"
With Quotes,"Say ""Hello World!""","10"" high"
With Newline,"Value 1 Line 1
Value 1 Line 2","Value 2 Line 1
Value 2 Line 2"
With TAB,"ABC DEF","123 456"
28 changes: 22 additions & 6 deletions csv2tsv/tests/tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,13 @@ echo "-----------------" >> ${basic_tests_1}
runtest ${prog} "input1_format1.csv" ${basic_tests_1}
runtest ${prog} "input1_format2.csv" ${basic_tests_1}
runtest ${prog} "input1_format3.csv" ${basic_tests_1}
runtest ${prog} "--quote # --csv-delim | --tsv-delim $ --replacement <==> input2.csv" ${basic_tests_1}
runtest ${prog} "-q # -c | -t @ -r <--> input2.csv" ${basic_tests_1}
runtest ${prog} "--quote # --csv-delim | --tsv-delim $ --tab-replacement <==> --newline-replacement <==> input2.csv" ${basic_tests_1}
runtest ${prog} "-q # -c | -t @ -r <--> -n <--> input2.csv" ${basic_tests_1}
runtest ${prog} "input3.csv" ${basic_tests_1}
runtest ${prog} "--tab-replacement <TAB> input3.csv" ${basic_tests_1}
runtest ${prog} "--newline-replacement <NL> input3.csv" ${basic_tests_1}
runtest ${prog} "-r <TAB> -n <NL> input3.csv" ${basic_tests_1}
runtest ${prog} "-r ␉ -n ␤ input3.csv" ${basic_tests_1}
runtest ${prog} "header1.csv header2.csv header3.csv header4.csv header5.csv" ${basic_tests_1}
runtest ${prog} "--header header1.csv header2.csv header3.csv header4.csv header5.csv" ${basic_tests_1}
runtest ${prog} "-H header1.csv header2.csv header3.csv header4.csv header5.csv" ${basic_tests_1}
Expand Down Expand Up @@ -92,18 +97,29 @@ ${prog} --tsv-delim $'\n' input2.csv >> ${error_tests_1} 2>&1
echo "" >> ${error_tests_1}; echo "====[csv2tsv --tsv-delim $'\r' input2.csv]====" >> ${error_tests_1}
${prog} --tsv-delim $'\r' input2.csv >> ${error_tests_1} 2>&1

echo "" >> ${error_tests_1}; echo "====[csv2tsv --replacement $'\n' input2.csv]====" >> ${error_tests_1}
${prog} --replacement $'\n' input2.csv >> ${error_tests_1} 2>&1
echo "" >> ${error_tests_1}; echo "====[csv2tsv --tab-replacement $'\n' input2.csv]====" >> ${error_tests_1}
${prog} --tab-replacement $'\n' input2.csv >> ${error_tests_1} 2>&1

echo "" >> ${error_tests_1}; echo "====[csv2tsv --replacement $'\r' input2.csv]====" >> ${error_tests_1}
${prog} --replacement $'\r' input2.csv >> ${error_tests_1} 2>&1
echo "" >> ${error_tests_1}; echo "====[csv2tsv --tab-replacement $'\r' input2.csv]====" >> ${error_tests_1}
${prog} --tab-replacement $'\r' input2.csv >> ${error_tests_1} 2>&1

echo "" >> ${error_tests_1}; echo "====[csv2tsv -r $'__\n__' input2.csv]====" >> ${error_tests_1}
${prog} -r $'__\n__' input2.csv >> ${error_tests_1} 2>&1

echo "" >> ${error_tests_1}; echo "====[csv2tsv -r $'__\r__' input2.csv]====" >> ${error_tests_1}
${prog} -r $'__\r__' input2.csv >> ${error_tests_1} 2>&1

echo "" >> ${error_tests_1}; echo "====[csv2tsv --newline-replacement $'\n' input2.csv]====" >> ${error_tests_1}
${prog} --newline-replacement $'\n' input2.csv >> ${error_tests_1} 2>&1

echo "" >> ${error_tests_1}; echo "====[csv2tsv --newline-replacement $'\r' input2.csv]====" >> ${error_tests_1}
${prog} --newline-replacement $'\r' input2.csv >> ${error_tests_1} 2>&1

echo "" >> ${error_tests_1}; echo "====[csv2tsv -n $'__\n__' input2.csv]====" >> ${error_tests_1}
${prog} -n $'__\n__' input2.csv >> ${error_tests_1} 2>&1

echo "" >> ${error_tests_1}; echo "====[csv2tsv -n $'__\r__' input2.csv]====" >> ${error_tests_1}
${prog} -n $'__\r__' input2.csv >> ${error_tests_1} 2>&1

runtest ${prog} "-q x -c x input2.csv" ${error_tests_1}
runtest ${prog} "-q x -t x input2.csv" ${error_tests_1}
Expand Down

0 comments on commit c439745

Please sign in to comment.