Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New tool: tsv-split #270

Merged
merged 23 commits into from
Mar 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
dd189bf
[WIP] tsv-split: Initial code.
jondegenhardt Mar 6, 2020
7994c2f
[WIP] tsv-split: file names with consistent numberic widths.
jondegenhardt Mar 6, 2020
b31540c
[WIP] tsv-split: Misc code changes.
jondegenhardt Mar 6, 2020
e0860b6
[WIP] tsv-split: Add directory command-line arg.
jondegenhardt Mar 7, 2020
5db4f45
[WIP] tsv-split: standalone rlimit cover.
jondegenhardt Mar 7, 2020
a2b7610
[WIP] tsv-split: --max-open-files option.
jondegenhardt Mar 7, 2020
5dfa83d
[WIP] tsv-split help.
jondegenhardt Mar 7, 2020
4b44f37
[WIP] tsv-split --lines-per-file.
jondegenhardt Mar 7, 2020
0222a48
[WIP] tsv-split unit tests.
jondegenhardt Mar 15, 2020
ab0355a
[WIP] tsv-split unit test updates.
jondegenhardt Mar 15, 2020
51670fb
[WIP] tsv-split unit tests.
jondegenhardt Mar 16, 2020
10ea3b2
[WIP] tsv-split unit tests.
jondegenhardt Mar 16, 2020
87f3aa5
[WIP] tsv-split dub build.
jondegenhardt Mar 16, 2020
c07a87a
[WIP] tsv-split: codecov files need to output to test directory.
jondegenhardt Mar 16, 2020
547530c
[WIP] tsv-split: Additional unit test cases.
jondegenhardt Mar 16, 2020
f2e6d9a
Update shebang lines on all tests.sh files.
jondegenhardt Mar 16, 2020
ccd1b38
Spell correction in test routine error messages.
jondegenhardt Mar 16, 2020
5b93597
[WIP] tsv-split help documentation.
jondegenhardt Mar 17, 2020
c2d1eb7
[WIP] tsv-split help doc update.
jondegenhardt Mar 17, 2020
f9ede20
[WIP] tsv-split bash completion.
jondegenhardt Mar 17, 2020
b8e9bd2
tsv-split help edits.
jondegenhardt Mar 17, 2020
70e304e
[WIP] tsv-split: setup for PGO. Cannot use yet, LDC issue.
jondegenhardt Mar 18, 2020
b42d329
tsv-split: Add a simple buffering mechanism when splitting by line co…
jondegenhardt Mar 18, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions bash_completion/tsv-utils
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,31 @@ _tsv_select()
}
complete -F _tsv_select tsv-select

_tsv_split()
{
local cur prev opts
COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}"
prev="${COMP_WORDS[COMP_CWORD-1]}"
opts="--help --help-verbose --version --header --header-in-only --lines-per-file --num-files --key-fields --dir --prefix --suffix --append --static-seed --seed-value --delimiter --max-open-files"

# Options requiring an argument or precluding other options
case $prev in
-h|--help|--help-verbose|-V|--version|-l|--lines-per-file|-n|--num-files|-k|--key-fields|--dir|--prefix|--suffix|-v|--seed-value|-d|--delimiter|--max-open-files)
return
;;
esac

if [[ ${cur} == -* ]] ; then
COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
return 0
fi

compopt -o filenames
COMPREPLY=( $(compgen -f -X "$xspec" -- ${cur}) $( compgen -d -- "$cur" ) )
}
complete -F _tsv_split tsv-split

_tsv_summarize()
{
local cur prev opts
Expand Down
2 changes: 1 addition & 1 deletion common/src/tsv_utils/common/utils.d
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
Utilities used by tsv-utils applications. InputFieldReordering, BufferedOututRange,
Utilities used by tsv-utils applications. InputFieldReordering, BufferedOutputRange,
and a several others.

Utilities in this file:
Expand Down
2 changes: 1 addition & 1 deletion csv2tsv/tests/tests.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#! /bin/sh
#!/usr/bin/env bash

# Note: Majority of testing for this app is in the unit tests built into the code.
# These tests do some basic, plus file handling and error cases.
Expand Down
1 change: 1 addition & 0 deletions dub.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"./tsv-pretty/",
"./tsv-sample/",
"./tsv-select/",
"./tsv-split/",
"./tsv-summarize/",
"./tsv-uniq/"
],
Expand Down
2 changes: 1 addition & 1 deletion dub_build.d
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ int main(string[] args) {

// Note: At present 'common' is a source library and does not need a standalone compilation step.
auto packageName = "tsv-utils";
auto subPackages = ["csv2tsv", "keep-header", "number-lines", "tsv-append", "tsv-filter", "tsv-join", "tsv-pretty", "tsv-sample", "tsv-select", "tsv-summarize", "tsv-uniq"];
auto subPackages = ["csv2tsv", "keep-header", "number-lines", "tsv-append", "tsv-filter", "tsv-join", "tsv-pretty", "tsv-sample", "tsv-select", "tsv-split", "tsv-summarize", "tsv-uniq"];
auto buildCmdArgs = ["dub", "build", "<package>", "--force", "-b"];
buildCmdArgs ~= debugBuild ? "debug" : "release";
if (compiler.length > 0) {
Expand Down
2 changes: 1 addition & 1 deletion keep-header/tests/tests.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#! /bin/sh
#!/usr/bin/env bash

if [ $# -le 1 ]; then
echo "Insufficient arguments. A program name and output directory are required."
Expand Down
2 changes: 1 addition & 1 deletion makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
appdirs = csv2tsv keep-header number-lines tsv-append tsv-filter tsv-join tsv-pretty tsv-sample tsv-select tsv-summarize tsv-uniq
appdirs = csv2tsv keep-header number-lines tsv-append tsv-filter tsv-join tsv-pretty tsv-sample tsv-select tsv-split tsv-summarize tsv-uniq
subdirs = common $(appdirs)
buildtools_dir = buildtools

Expand Down
4 changes: 2 additions & 2 deletions number-lines/tests/tests.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#! /bin/sh
#!/usr/bin/env bash

if [ $# -le 1 ]; then
echo "Insufficient arguments. A program name and output director are required."
echo "Insufficient arguments. A program name and output directory are required."
exit 1
fi

Expand Down
4 changes: 2 additions & 2 deletions tsv-append/tests/tests.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#! /bin/sh
#!/usr/bin/env bash

## Most tsv-append testing is done as unit tests. Tests executed by this script are
## run against the final executable. This provides a sanity check that the
Expand All @@ -7,7 +7,7 @@
## Instead, these tests focus on areas that are hard to test in unit tests.

if [ $# -le 1 ]; then
echo "Insufficient arguments. A program name and output director are required."
echo "Insufficient arguments. A program name and output directory are required."
exit 1
fi

Expand Down
4 changes: 2 additions & 2 deletions tsv-filter/tests/tests.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#! /bin/sh
#!/usr/bin/env bash

if [ $# -le 1 ]; then
echo "Insufficient arguments. A program name and output director are required."
echo "Insufficient arguments. A program name and output directory are required."
exit 1
fi

Expand Down
4 changes: 2 additions & 2 deletions tsv-join/tests/tests.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#! /bin/sh
#!/usr/bin/env bash

if [ $# -le 1 ]; then
echo "Insufficient arguments. A program name and output director are required."
echo "Insufficient arguments. A program name and output directory are required."
exit 1
fi

Expand Down
4 changes: 2 additions & 2 deletions tsv-pretty/tests/tests.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#! /bin/sh
#!/usr/bin/env bash

## Command line tests of the build executable

if [ $# -le 1 ]; then
echo "Insufficient arguments. A program name and output director are required."
echo "Insufficient arguments. A program name and output directory are required."
exit 1
fi

Expand Down
2 changes: 1 addition & 1 deletion tsv-sample/tests/tests.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#! /bin/sh
#!/usr/bin/env bash

## Most tsv-sample testing is done as unit tests. Tests executed by this script are run
## against the final executable. This provides a sanity check that the final executable
Expand Down
4 changes: 2 additions & 2 deletions tsv-select/tests/tests.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#! /bin/sh
#!/usr/bin/env bash

if [ $# -le 1 ]; then
echo "Insufficient arguments. A program name and output director are required."
echo "Insufficient arguments. A program name and output directory are required."
exit 1
fi

Expand Down
30 changes: 30 additions & 0 deletions tsv-split/dub.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"name": "tsv-split",
"description": "Split a file into multiple files.",
"homepage": "https://github.com/eBay/tsv-utils",
"authors": ["Jon Degenhardt"],
"copyright": "Copyright (c) 2020, eBay Inc.",
"license": "BSL-1.0",
"targetType": "executable",
"configurations": [
{
"name" : "executable",
"targetName": "tsv-split",
"targetPath": "../bin/",
"mainSourceFile": "src/tsv_utils/tsv-split.d",
"dependencies": {
"tsv-utils:common": { "path": ".." }
}
},
{
"name": "unittest",
"targetType": "none"
}
],
"buildTypes": {
"debug": { "buildOptions": ["debugMode", "optimize"] },
"release": { "buildOptions": ["releaseMode", "optimize", "inline"],
"dflags": ["-boundscheck=off"],
"dflags-osx-ldc": ["-flto=thin"] }
}
}
4 changes: 4 additions & 0 deletions tsv-split/makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# PGO disabled for now due to LDC compilation failures starting with LDC 1.12.0
# APP_USES_LDC_PGO=2
include ../makedefs.mk
include ../makeapp.mk
57 changes: 57 additions & 0 deletions tsv-split/profile_data/collect_profile_data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#! /bin/sh

if [ $# -eq 0 ]; then
echo "Insufficient arguments. The path of the instrumented program is required."
exit 1
fi

prog=$1
shift

ldc_profdata_tool_name=ldc-profdata
ldc_profdata_tool=${ldc_profdata_tool_name}

if [ $# -ne 0 ]; then
ldc_profdata_tool=${1}/bin/${ldc_profdata_tool_name}
fi

for f in profile.*.raw; do
if [ -e $f ]; then
rm $f
fi
done

if [ -e app.profdata ]; then
rm -f app.profdata
fi

mkdir -p odir

$prog --dir odir profile_data_1.tsv --lines-per-file 10 ; rm odir/*
$prog --dir odir profile_data_1.tsv --lines-per-file 100 ; rm odir/*
$prog --dir odir profile_data_1.tsv --num-files 5 ; rm odir/*
$prog --dir odir profile_data_1.tsv --num-files 50 ; rm odir/*
$prog --dir odir profile_data_1.tsv --num-files 5 -k 1 ; rm odir/*
$prog --dir odir profile_data_1.tsv --num-files 50 -k 1 ; rm odir/*

$prog --dir odir profile_data_2.tsv --lines-per-file 500 ; rm odir/*
$prog --dir odir profile_data_2.tsv --lines-per-file 20 -H ; rm odir/*
$prog --dir odir profile_data_2.tsv --num-files 100 -H ; rm odir/*
$prog --dir odir profile_data_2.tsv --num-files 100 -I ; rm odir/*
$prog --dir odir profile_data_2.tsv --num-files 5 -k 1 ; rm odir/*
cat profile_data_2.tsv | $prog --dir odir --num-files 100 -k 1 -I ; rm odir/*
cat profile_data_2.tsv | $prog --dir odir --num-files 100 -k 2,4 -H ; rm odir/*
cat profile_data_2.tsv | $prog --dir odir --num-files 100 -k 1,3 ; rm odir/*

$prog --dir odir profile_data_3.tsv --lines-per-file 300 ; rm odir/*
$prog --dir odir profile_data_3.tsv --num-files 200 --max-open-files 20 ; rm odir/*
$prog --dir odir profile_data_3.tsv --num-files 200 -k 4 --max-open-files 20 ; rm odir/*
$prog --dir odir profile_data_3.tsv --num-files 200 -k 2,3 --max-open-files 100 ; rm odir/*

$prog --dir odir --lines-per-file 1000 profile_data_1.tsv profile_data_2.tsv profile_data_3.tsv ; rm odir/*
$prog --dir odir --num-files 100 profile_data_1.tsv profile_data_2.tsv profile_data_3.tsv ; rm odir/*
$prog --dir odir --num-files 100 -k 1,2 profile_data_1.tsv profile_data_2.tsv profile_data_3.tsv ; rm odir/*

rmdir odir

${ldc_profdata_tool} merge -o app.profdata profile.*.raw
Loading